{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4c5d758d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "import tempfile\n",
    "\n",
    "from datasets import load_dataset\n",
    "from detect_secrets import SecretsCollection\n",
    "from detect_secrets.settings import default_settings, transient_settings"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1b89bca8",
   "metadata": {},
   "source": [
    "## Run detect-secrets with default settings on 1k samples\n",
    "We will run detect-secrets with default settings (with all plugins and filters) on the 1K subset of the stack (the original labelling dataset), to see which detector berform well and which don't. Then we will select a subset of plugins and filters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "330a8323",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using custom data configuration loubnabnl--pii_labeling_dataset-e7718515568813a1\n",
      "Found cached dataset parquet (/Users/loubnabenallal/.cache/huggingface/datasets/loubnabnl___parquet/loubnabnl--pii_labeling_dataset-e7718515568813a1/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Dataset({\n",
       "    features: ['content', 'licenses', 'repository_name', 'path', 'size', 'lang'],\n",
       "    num_rows: 1000\n",
       "})"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ds = load_dataset(\"loubnabnl/pii_labeling_dataset\", split=\"train\")\n",
    "ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "81f91048",
   "metadata": {},
   "outputs": [],
   "source": [
    "def scan_str_content(content, suffix=\".txt\"):\n",
    "    \"\"\"Detect secret keys in content\n",
    "    Args:\n",
    "        content (str): content to scan\n",
    "        suffix (str): suffix of the file\n",
    "    Returns:\n",
    "        list: list of secrets found\"\"\"\n",
    "    \n",
    "    fp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False, mode='w')\n",
    "    fp.write(content)\n",
    "    fp.close()\n",
    "    secrets = SecretsCollection()\n",
    "    #with transient_settings({'plugins_used': plugins, 'filters_used': filters}) as settings:\n",
    "    with default_settings() as settings:\n",
    "        secrets.scan_file(fp.name)\n",
    "    os.unlink(fp.name)\n",
    "    secrets_set = list(secrets.data.values())\n",
    "    result = []\n",
    "    if secrets_set:\n",
    "        for secret in secrets_set[0]:\n",
    "            result.append({\n",
    "                'type': secret.type,\n",
    "                'secret_value': secret.secret_value,\n",
    "                'start_index': content.index(secret.secret_value),\n",
    "                'end_index': content.index(secret.secret_value) + len(secret.secret_value),\n",
    "            })\n",
    "    return result\n",
    "\n",
    "\n",
    "def scan_secrets_batch(examples):\n",
    "    \"\"\"Scan a batch of examples from a dataset for secret keys\n",
    "    This add two columns to the dataset:\n",
    "    - pii: (list) of secrets found\n",
    "    - has_pii: (bool) whether the example contains secret\"\"\"\n",
    "\n",
    "    list_secrets = []\n",
    "    list_types = []\n",
    "    list_limits = []\n",
    "    has_secrets = []\n",
    "    for text in examples[\"content\"]:\n",
    "        output = scan_str_content(text, suffix=\".txt\")\n",
    "        if  output:\n",
    "            # get secret values of each element in output\n",
    "            # to add this in datasets we need same number of samples in each row\n",
    "            # we save it as str instead of list\n",
    "            secrets = str([e['secret_value'] for e in output])\n",
    "            types = str([e['type'] for e in output])\n",
    "            limits = str([(e['start_index'], e['end_index']) for e in output])\n",
    "            list_secrets.append(secrets)\n",
    "            list_types.append(types)\n",
    "            list_limits.append(limits)\n",
    "            has_secrets.append(True)\n",
    "        else:\n",
    "            list_secrets.append(\"\")\n",
    "            list_types.append(\"\")\n",
    "            list_limits.append(\"\")\n",
    "            has_secrets.append(False)\n",
    "    return {\"secrets\": list_secrets, \"types\": list_types, \"has_secrets\": has_secrets}\n",
    "\n",
    "\n",
    "def scan_secrets_batch_viz(examples):\n",
    "    outputs = []\n",
    "    for i, text in enumerate(examples[\"content\"]):\n",
    "        output = scan_str_content(text, suffix=\".txt\")\n",
    "        if  output:\n",
    "            outputs.append({\"id\": i, \"secrets\": output})\n",
    "    return outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ea08ca93",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'type': 'AWS Access Key',\n",
       "  'secret_value': 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY',\n",
       "  'start_index': 71,\n",
       "  'end_index': 111},\n",
       " {'type': 'Base64 High Entropy String',\n",
       "  'secret_value': 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY',\n",
       "  'start_index': 71,\n",
       "  'end_index': 111},\n",
       " {'type': 'AWS Access Key',\n",
       "  'secret_value': 'AKIAIOSFODNN7EXAMPLE',\n",
       "  'start_index': 28,\n",
       "  'end_index': 48},\n",
       " {'type': 'Base64 High Entropy String',\n",
       "  'secret_value': 'AQoEXAMPLEH4aoAH0gNCAPyJxz4BlCFFxWNE1OPTgk5TthT+FvwqnKwRcOIfrRh3c/',\n",
       "  'start_index': 130,\n",
       "  'end_index': 196},\n",
       " {'type': 'Secret Keyword',\n",
       "  'secret_value': 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY',\n",
       "  'start_index': 71,\n",
       "  'end_index': 111}]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# example\n",
    "content = '''[default]\n",
    "aws_access_key_id=AKIAIOSFODNN7EXAMPLE\n",
    "aws_secret_access_key=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY\n",
    "aws_session_token=AQoEXAMPLEH4aoAH0gNCAPyJxz4BlCFFxWNE1OPTgk5TthT+FvwqnKwRcOIfrRh3c/\n",
    "sso_account_id = 123456789012\n",
    "IP http://10.0.0.0:24\n",
    "'''\n",
    "scan_str_content(content, '.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8fed57cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# use multiprocessing to scan the dataset, takes few seconds\n",
    "ds_detect_secrets = ds.map(\n",
    "    scan_secrets_batch,\n",
    "    batched=True,\n",
    "    batch_size=10,\n",
    "    num_proc=12,\n",
    "    load_from_cache_file=False\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "c2a84daa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Dataset({\n",
       "    features: ['content', 'licenses', 'repository_name', 'path', 'size', 'lang', 'secrets', 'types', 'has_secrets'],\n",
       "    num_rows: 1000\n",
       "})"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ds_detect_secrets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "fee4bc94",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading cached processed dataset at /Users/loubnabenallal/.cache/huggingface/datasets/loubnabnl___parquet/loubnabnl--pii_labeling_dataset-e7718515568813a1/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-23b64a24a2c7f4b7.arrow\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of samples with secrets:  28\n"
     ]
    }
   ],
   "source": [
    "# filter on has_secrets true\n",
    "print(\"number of samples with secrets: \", len(ds_detect_secrets.filter(lambda x: x['has_secrets'])))\n",
    "# the secrets are much more"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "586bd1fd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['', '', '', '', '', '', '', '', '', '', '', '', \"['75e5849b1a27d71e74de1390a4fc81c38b4ed8ce24d4efb2c9a5807d0e82106c']\", '', '', '', '', '', '', '', '', '', '', '', '', \"['bf2c7ce40b04ae811af714deb512510cc2c17b9ab9d6ddcf49fe4487eea7af3d', '56c932549852cddcfafdab3820b0200c7742675be92179e59e6215b340e26467']\", '', '', '', '', '', '', '', '', \"['7ab18906739e4662ac01e69f5ebb7352', 'test']\", '', '', '', '', '', '', '', \"['vmwin10', 'admin']\", \"['xx']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['secret', 'test-secret-key']\", '', '', '', '', '', '', '', '', '', \"['000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['PASSWORD']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['0100000090f0a9f110702f808219ebea1173056042a714bad51b916cb6800000000000005275289558f51c9966699404ae2294730c3c9f9bda53523ce50e9b95e558da2fdb261b4d4c86041b1ab1bf930901000000010000000000000000000000000000000000000000000000000000000000000000ffffffff07044c86041b0146ffffffff0100f2052a01000000434104e18f7afbe4721580e81e8414fc8c24d7cfacf254bb5c7b949450c3e997c2dc1242487a8169507b631eb3771f2b425483fb13102c4eb5d858eef260fe70fbfae0ac00000000010000000196608ccbafa16abada902780da4dc35dafd7af05fa0da08cf833575f8cf9e836000000004a493046022100dab24889213caf43ae6adc41cf1c9396c08240c199f5225acf45416330fd7dbd022100fe37900e0644bf574493a07fc5edba06dbc07c311b947520c2d514bc5725dcb401ffffffff0100f2052a010000001976a914f15d1921f52e4007b146dfa60f369ed2fc393ce288ac000000000100000001fb766c1288458c2bafcfec81e48b24d98ec706de6b8af7c4e3c29419bfacb56d000000008c493046022100f268ba165ce0ad2e6d93f089cfcd3785de5c963bb5ea6b8c1b23f1ce3e517b9f022100da7c0f21adc6c401887f2bfd1922f11d76159cbc597fbd756a23dcbb00f4d7290141042b4e8625a96127826915a5b109852636ad0da753c9e1d5606a50480cd0c40f1f8b8d898235e571fe9357d9ec842bc4bba1827daaf4de06d71844d0057707966affffffff0280969800000000001976a9146963907531db72d0ed1a0cfb471ccb63923446f388ac80d6e34c000000001976a914f0688ba1c0d1ce182c7af6741e02658c7d4dfcd388ac000000000100000002c40297f730dd7b5a99567eb8d27b78758f607507c52292d02d4031895b52f2ff010000008b483045022100f7edfd4b0aac404e5bab4fd3889e0c6c41aa8d0e6fa122316f68eddd0a65013902205b09cc8b2d56e1cd1f7f2fafd60a129ed94504c4ac7bdc67b56fe67512658b3e014104732012cb962afa90d31b25d8fb0e32c94e513ab7a17805c14ca4c3423e18b4fb5d0e676841733cb83abaf975845c9f6f2a8097b7d04f4908b18368d6fc2d68ecffffffffca5065ff9617cbcba45eb23726df6498a9b9cafed4f54cbab9d227b0035ddefb000000008a473044022068010362a13c7f9919fa832b2dee4e788f61f6f5d344a7c2a0da6ae740605658022006d1af525b9a14a35c003b78b72bd59738cd676f845d1ff3fc25049e01003614014104732012cb962afa90d31b25d8fb0e32c94e513ab7a17805c14ca4c3423e18b4fb5d0e676841733cb83abaf975845c9f6f2a8097b7d04f4908b18368d6fc2d68ecffffffff01001ec4110200000043410469ab4181eceb28985b9b4e895c13fa5e68d85761b7eee311db5addef76fa8621865134a221bd01f28ec9999ee3e021e60766e9d1f3458c115fb28650605f11c9ac000000000100000001cdaf2f758e91c514655e2dc50633d1e4c84989f8aa90a0dbc883f0d23ed5c2fa010000008b48304502207ab51be6f12a1962ba0aaaf24a20e0b69b27a94fac5adf45aa7d2d18ffd9236102210086ae728b370e5329eead9accd880d0cb070aea0c96255fae6c4f1ddcce1fd56e014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff02404b4c00000000001976a9142b6ba7c9d796b75eef7942fc9288edd37c32f5c388ac002d3101000000001976a9141befba0cdc1ad56529371864d9f6cb042faa06b588ac000000000100000001b4a47603e71b61bc3326efd90111bf02d2f549b067f4c4a8fa183b57a0f800cb010000008a4730440220177c37f9a505c3f1a1f0ce2da777c339bd8339ffa02c7cb41f0a5804f473c9230220585b25a2ee80eb59292e52b987dad92acb0c64eced92ed9ee105ad153cdb12d001410443bd44f683467e549dae7d20d1d79cbdb6df985c6e9c029c8d0c6cb46cc1a4d3cf7923c5021b27f7a0b562ada113bc85d5fda5a1b41e87fe6e8802817cf69996ffffffff0280651406000000001976a9145505614859643ab7b547cd7f1f5e7e2a12322d3788ac00aa0271000000001976a914ea4720a7a52fc166c55ff2298e07baf70ae67e1b88ac00000000010000000586c62cd602d219bb60edb14a3e204de0705176f9022fe49a538054fb14abb49e010000008c493046022100f2bc2aba2534becbdf062eb993853a42bbbc282083d0daf9b4b585bd401aa8c9022100b1d7fd7ee0b95600db8535bbf331b19eed8d961f7a8e54159c53675d5f69df8c014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff03ad0e58ccdac3df9dc28a218bcf6f1997b0a93306faaa4b3a28ae83447b2179010000008b483045022100be12b2937179da88599e27bb31c3525097a07cdb52422d165b3ca2f2020ffcf702200971b51f853a53d644ebae9ec8f3512e442b1bcb6c315a5b491d119d10624c83014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff2acfcab629bbc8685792603762c921580030ba144af553d271716a95089e107b010000008b483045022100fa579a840ac258871365dd48cd7552f96c8eea69bd00d84f05b283a0dab311e102207e3c0ee9234814cfbb1b659b83671618f45abc1326b9edcc77d552a4f2a805c0014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffffdcdc6023bbc9944a658ddc588e61eacb737ddf0a3cd24f113b5a8634c517fcd2000000008b4830450221008d6df731df5d32267954bd7d2dda2302b74c6c2a6aa5c0ca64ecbabc1af03c75022010e55c571d65da7701ae2da1956c442df81bbf076cdbac25133f99d98a9ed34c014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffffe15557cd5ce258f479dfd6dc6514edf6d7ed5b21fcfa4a038fd69f06b83ac76e010000008b483045022023b3e0ab071eb11de2eb1cc3a67261b866f86bf6867d4558165f7c8c8aca2d86022100dc6e1f53a91de3efe8f63512850811f26284b62f850c70ca73ed5de8771fb451014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff01404b4c00000000001976a9142b6ba7c9d796b75eef7942fc9288edd37c32f5c388ac00000000010000000166d7577163c932b4f9690ca6a80b6e4eb001f0a2fa9023df5595602aae96ed8d000000008a4730440220262b42546302dfb654a229cefc86432b89628ff259dc87edd1154535b16a67e102207b4634c020a97c3e7bbd0d4d19da6aa2269ad9dded4026e896b213d73ca4b63f014104979b82d02226b3a4597523845754d44f13639e3bf2df5e82c6aab2bdc79687368b01b1ab8b19875ae3c90d661a3d0a33161dab29934edeb36aa01976be3baf8affffffff02404b4c00000000001976a9144854e695a02af0aeacb823ccbc272134561e0a1688ac40420f00000000001976a914abee93376d6b37b5c2940655a6fcaf1c8e74237988ac0000000001000000014e3f8ef2e91349a9059cb4f01e54ab2597c1387161d3da89919f7ea6acdbb371010000008c49304602210081f3183471a5ca22307c0800226f3ef9c353069e0773ac76bb580654d56aa523022100d4c56465bdc069060846f4fbf2f6b20520b2a80b08b168b31e66ddb9c694e240014104976c79848e18251612f8940875b2b08d06e6dc73b9840e8860c066b7e87432c477e9a59a453e71e6d76d5fe34058b800a098fc1740ce3012e8fc8a00c96af966ffffffff02c0e1e400000000001976a9144134e75a6fcb6042034aab5e18570cf1f844f54788ac404b4c00000000001976a9142b6ba7c9d796b75eef7942fc9288edd37c32f5c388ac00000000', '49c95db1e470fed04496d801c9d8fbb78155d2c7f855232c918823d2c17d0cf6']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['76a914d63b766cd342e6f0f7390dd454065e4bbea26b1b88ac', '000102030405060708090A0B0C0D0E0FF0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF', '9ec8177ca0a4f7aa21ec88a324f236a4d1dce6c610812a90e16febef4603a438']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['Brillio@2015']\", '', \"['ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['1128ED677399F969E25D9453320B85EF3D3BA35A', '2904137A030AE2370A8CD3E068078A1D59A4F229', '5974787496DFA27A4B7FE6023473FAE930EA41DC']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['1dbd57365a500dc852bb214404100124bb361a19618c7734fcf28cb932bd9630']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['02a7955281885bf0f0ca4a48678848cad8dc5b328ce8bc1d4481d041c98e891ff3', '433080ff80d0d52d7f8bfffff47f00807f44f680000949b800007f7f7ff1017f']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['root']\", '', '', '', '', '', '', \"['sha384-fnmOCqbTlWIlj8LyTjo7mOUStjsKC4pOpQbqyi7RrhN7udi9RwhKkMHpvLbHG9Sr', 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1', 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM', 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T', 'sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=']\", '', '', '', '', '', '', '', '', '', '', '', '', \"['37a3dc5111fe8f707ab4c132ef1dbc62', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\OriginalConstructorInvocationRequiredException', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\BadMethodCallException', '0e6d7bf4a5811bfa5cf40c5ccd6fae6a', 'b0b88a3b89caae681462c58ff19a7059', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\MethodNameAlreadyConfiguredException', '6124b4c8570aa390c21fafd04a26c69f', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\CannotUseOnlyMethodsException', 'd919fc9d5ad52cfb7f322f7fe36458ab', '786bf90caabc9e09b6ad4cc5ca8f0e30', 'e397f74f8af3b1e56166a6e99f216ee7', 'a1105708a18b76903365ca1c4aa61b02', '573214ce1ef6f18100c43e366703d73c', 'cc8e14526dc240491e17a838cb78508c', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\ConfigurableMethodsAlreadyInitializedException', '8a9dc1de0ca7e01f3e08231539562f61', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\MatchBuilderNotFoundException', 'PHPUnit\\\\\\\\\\\\\\\\TextUI\\\\\\\\\\\\\\\\XmlConfiguration\\\\\\\\\\\\\\\\MoveWhitelistExcludesToCoverage', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\CannotUseAddMethodsException', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\SoapExtensionNotAvailableException', '9d3db23ca418094bcf0b641a0c9559ed', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\ClassIsFinalException', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\MatcherAlreadyRegisteredException', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\ClassAlreadyExistsException', 'def43f6c87e4f8dfd0c9e1b1bab14fe8', '25072dd6e2470089de65ae7bf11d3109', 'b6b991a57620e2fb6b2f66f03fe9ddc2', '265b4faa2b3a9766332744949e83bf97', '0d59ee240a4cd96ddbb4ff164fccea4d', 'f598d06aa772fa33d905e87be6398fb1', 'e69f7f6ee287b969198c3c9d6777bd38', 'a4a119a56e50fbb293281d9a48007e0e', 'a0edc8309cc5e1d60e3047b5df6b7052', '07d7f1a47144818725fd8d91a907ac57', '2c102faa651ef8ea5874edb585946bce', '7e9bd612cc444b3eed788ebbe46263a0', '320cde22f66dd4f5d3fd621d3e88b98f', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\ComparisonMethodDoesNotDeclareBoolReturnTypeException', 'de95e0ac670b27c84ef8c5ac41fc1b34', '3d97c8dcdfba8cb85d3b34f116bb248b', '667aeda72477189d0494fecd327c3641', '377b22b161c09ed6e5152de788ca020a', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\MethodParametersAlreadyConfiguredException', '58571171fd5812e6e447dce228f52f4d', 'e6f3bc6883e449ab367280b34158c05b', '5c70426340c07411ceee79728a2304a8', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\ReturnValueNotConfiguredException', 'PHPUnit\\\\\\\\\\\\\\\\TextUI\\\\\\\\\\\\\\\\XmlConfiguration\\\\\\\\\\\\\\\\RemoveCacheTokensAttribute', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\MethodCannotBeConfiguredException', '6e3fae29631ef280660b3cdad06f25a8', 'c7a3c339e7e14b60e06a2d7fcce9476b', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\NoChildTestSuiteException', '9cdd7b9056abc3081735233ba9dd9c7f', '017b24472353920ed42bb364f7653c43', '538ca81a9a966a6716601ecf48f4eaef', 'd154b49fab8e4da34fb553a2d644918c', '751a5a3f463e4be759be31748b61737c', 'd59fbae42019aedf227094ac49a46f50', '8825ede83f2f289127722d4e842cf7e8', 'PHPUnit\\\\\\\\\\\\\\\\TextUI\\\\\\\\\\\\\\\\XmlConfiguration\\\\\\\\\\\\\\\\UpdateSchemaLocationTo93', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\ActualValueIsNotAnObjectException', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\InvalidCoversTargetException', 'da94ac5d3ca7d2dbab84ce561ce72bfd', '801c31d8ed748cfa537fa45402288c95', 'b6c2870932b0250c10334a86dcb33c7f', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\MethodNameNotConfiguredException', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\Rule\\\\\\\\\\\\\\\\InvokedAtLeastCount', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\IncompatibleReturnValueException', 'f0906e6318348a765ffb6eb24e0d0938', '290dd4ba42f11019134caca05dbefe3f', 'c964ee0ededf28c96ebd9db5099ef910', 'ed962a97bd972bc82007176b647d4e36', '7b11c4dc42b3b3023073cb14e519683c', 'ec07570ca5a812141189b1fa81503674', 'e39a8b23c42d4e1452234d762b03835a', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\ConfigurableMethod', 'b067bc7112e384b61c701452d53a14a8', 'PHPUnit\\\\\\\\\\\\\\\\Framework\\\\\\\\\\\\\\\\MockObject\\\\\\\\\\\\\\\\InvalidMethodNameException', 'd02cf21124526632320d6f20b1bbf905']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['sha384-IQsoLXl5PILFhosVNubq5LC7Qb9DXgDA9i+tQ8Zj3iwWAwPtgFTxbJ8NT4GN1R8p', 'sha384-AYmEC3Yw5cVb3ZcuHtOA93w35dYTsvhLPVnYs9eStHfGJvOvKxVfELGroGkvsg+p', 'sha384-cVKIPhGWiC2Al4u+LWgxfKTRIcfu0JTxR+EQDz/bgldoEyl4H0zUF0QKbrJ0EcQF', 'sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM', 'sha384-gXt9imSW0VcJVHezoNQsP+TNrjYXoGcrqBZJpry9zJt8PCQjobwmhMGaDHTASo9N']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['required|min:5']\", '', \"['sha512-nwpMzLYxfwDnu68Rt9PqLqgVtHkIJxEPrlu3PfTfLQKVgBAlTKDmim1JvCGNyNRtyvCx1nNIVBfYm8UZotWd4Q==']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiJ0a2VlbCIsImV4cCI6MTY0NDgyMzA2Miwic3ViIjoidXNyLTMzNzM3OTQ1YzJiNzE4ZGI0YzMwOWQ2MzNkMmYifQ.', 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiJ0a2VlbCIsImV4cCI6MTY0NDQwNDAzMSwic3ViIjoidXNyLTMzNzM3OTQ1YzJiNzE4ZGI0YzMwOWQ2MzNkMmYifQ.', 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiJ0a2VlbCIsImV4cCI6MTY0NDQwMDAzMiwic3ViIjoidXNyLTMzNzM3OTQ1YzJiNzE4ZGI0YzMwOWQ2MzNkMmYifQ.', 'OTY0ZTkxM2UtYjA3OC0zNzM0LWJlYTAtODdiZDY4YzRjMGM4']\", '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', \"['aebb1a098a77f6e9477c5f426b363895d2f0cc77c46a3d84c871a9fab2f08d54', 'eb75dda827c656a33be6e60f18b3943c4dd4252205e557ec95d1cf44df8e3a35', '51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274', '17e976657779458a4b7aebd420a0916fa7f2746e6530602ff13a55cf81469462', 'd6edae2002e2df530bd14e8cb27eb6dce1a29fe15b5ec614d9c3b7610fe00d96', '098a3b74c65c030729fad809210f1e31d96a2295610376989134be981f4fcc30', '2998837ff866b54bdf6cdaf923417cc98a2dbe7fa831142c092ebb724694fa42']\", '', '', '', '', '', '', '', \"['nil']\", '', '', '', '', '', '', \"['71f05afc51e3d9b03376b2f98fd452d3a274d595']\", '', '', \"['7f6c8dc83d77134b5a3a1c53f1202b395b04482b']\", '', '', '', '', '', '', '', \"['xoxb-3242497751-XHFJhTNa87987dhADff7873A', 'secret']\", '', '', '', '', '', \"['password']\", '', '', '', '', '', '', '']\n"
     ]
    }
   ],
   "source": [
    "print(ds_detect_secrets[\"secrets\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "d709086b",
   "metadata": {},
   "outputs": [],
   "source": [
    "result = scan_secrets_batch_viz(ds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "374e4fef",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'id': 12,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '75e5849b1a27d71e74de1390a4fc81c38b4ed8ce24d4efb2c9a5807d0e82106c',\n",
       "    'start_index': 2449,\n",
       "    'end_index': 2513}]},\n",
       " {'id': 25,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '56c932549852cddcfafdab3820b0200c7742675be92179e59e6215b340e26467',\n",
       "    'start_index': 545,\n",
       "    'end_index': 609},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'bf2c7ce40b04ae811af714deb512510cc2c17b9ab9d6ddcf49fe4487eea7af3d',\n",
       "    'start_index': 194,\n",
       "    'end_index': 258}]},\n",
       " {'id': 34,\n",
       "  'secrets': [{'type': 'Secret Keyword',\n",
       "    'secret_value': 'test',\n",
       "    'start_index': 996,\n",
       "    'end_index': 1000},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '7ab18906739e4662ac01e69f5ebb7352',\n",
       "    'start_index': 1168,\n",
       "    'end_index': 1200}]},\n",
       " {'id': 42,\n",
       "  'secrets': [{'type': 'Secret Keyword',\n",
       "    'secret_value': 'admin',\n",
       "    'start_index': 175,\n",
       "    'end_index': 180},\n",
       "   {'type': 'Secret Keyword',\n",
       "    'secret_value': 'vmwin10',\n",
       "    'start_index': 430,\n",
       "    'end_index': 437}]},\n",
       " {'id': 43,\n",
       "  'secrets': [{'type': 'Secret Keyword',\n",
       "    'secret_value': 'xx',\n",
       "    'start_index': 238,\n",
       "    'end_index': 240}]},\n",
       " {'id': 74,\n",
       "  'secrets': [{'type': 'Secret Keyword',\n",
       "    'secret_value': 'secret',\n",
       "    'start_index': 4444,\n",
       "    'end_index': 4450},\n",
       "   {'type': 'Secret Keyword',\n",
       "    'secret_value': 'test-secret-key',\n",
       "    'start_index': 4439,\n",
       "    'end_index': 4454}]},\n",
       " {'id': 84,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f',\n",
       "    'start_index': 6655,\n",
       "    'end_index': 6719}]},\n",
       " {'id': 140,\n",
       "  'secrets': [{'type': 'Secret Keyword',\n",
       "    'secret_value': 'PASSWORD',\n",
       "    'start_index': 865,\n",
       "    'end_index': 873}]},\n",
       " {'id': 320,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '0100000090f0a9f110702f808219ebea1173056042a714bad51b916cb6800000000000005275289558f51c9966699404ae2294730c3c9f9bda53523ce50e9b95e558da2fdb261b4d4c86041b1ab1bf930901000000010000000000000000000000000000000000000000000000000000000000000000ffffffff07044c86041b0146ffffffff0100f2052a01000000434104e18f7afbe4721580e81e8414fc8c24d7cfacf254bb5c7b949450c3e997c2dc1242487a8169507b631eb3771f2b425483fb13102c4eb5d858eef260fe70fbfae0ac00000000010000000196608ccbafa16abada902780da4dc35dafd7af05fa0da08cf833575f8cf9e836000000004a493046022100dab24889213caf43ae6adc41cf1c9396c08240c199f5225acf45416330fd7dbd022100fe37900e0644bf574493a07fc5edba06dbc07c311b947520c2d514bc5725dcb401ffffffff0100f2052a010000001976a914f15d1921f52e4007b146dfa60f369ed2fc393ce288ac000000000100000001fb766c1288458c2bafcfec81e48b24d98ec706de6b8af7c4e3c29419bfacb56d000000008c493046022100f268ba165ce0ad2e6d93f089cfcd3785de5c963bb5ea6b8c1b23f1ce3e517b9f022100da7c0f21adc6c401887f2bfd1922f11d76159cbc597fbd756a23dcbb00f4d7290141042b4e8625a96127826915a5b109852636ad0da753c9e1d5606a50480cd0c40f1f8b8d898235e571fe9357d9ec842bc4bba1827daaf4de06d71844d0057707966affffffff0280969800000000001976a9146963907531db72d0ed1a0cfb471ccb63923446f388ac80d6e34c000000001976a914f0688ba1c0d1ce182c7af6741e02658c7d4dfcd388ac000000000100000002c40297f730dd7b5a99567eb8d27b78758f607507c52292d02d4031895b52f2ff010000008b483045022100f7edfd4b0aac404e5bab4fd3889e0c6c41aa8d0e6fa122316f68eddd0a65013902205b09cc8b2d56e1cd1f7f2fafd60a129ed94504c4ac7bdc67b56fe67512658b3e014104732012cb962afa90d31b25d8fb0e32c94e513ab7a17805c14ca4c3423e18b4fb5d0e676841733cb83abaf975845c9f6f2a8097b7d04f4908b18368d6fc2d68ecffffffffca5065ff9617cbcba45eb23726df6498a9b9cafed4f54cbab9d227b0035ddefb000000008a473044022068010362a13c7f9919fa832b2dee4e788f61f6f5d344a7c2a0da6ae740605658022006d1af525b9a14a35c003b78b72bd59738cd676f845d1ff3fc25049e01003614014104732012cb962afa90d31b25d8fb0e32c94e513ab7a17805c14ca4c3423e18b4fb5d0e676841733cb83abaf975845c9f6f2a8097b7d04f4908b18368d6fc2d68ecffffffff01001ec4110200000043410469ab4181eceb28985b9b4e895c13fa5e68d85761b7eee311db5addef76fa8621865134a221bd01f28ec9999ee3e021e60766e9d1f3458c115fb28650605f11c9ac000000000100000001cdaf2f758e91c514655e2dc50633d1e4c84989f8aa90a0dbc883f0d23ed5c2fa010000008b48304502207ab51be6f12a1962ba0aaaf24a20e0b69b27a94fac5adf45aa7d2d18ffd9236102210086ae728b370e5329eead9accd880d0cb070aea0c96255fae6c4f1ddcce1fd56e014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff02404b4c00000000001976a9142b6ba7c9d796b75eef7942fc9288edd37c32f5c388ac002d3101000000001976a9141befba0cdc1ad56529371864d9f6cb042faa06b588ac000000000100000001b4a47603e71b61bc3326efd90111bf02d2f549b067f4c4a8fa183b57a0f800cb010000008a4730440220177c37f9a505c3f1a1f0ce2da777c339bd8339ffa02c7cb41f0a5804f473c9230220585b25a2ee80eb59292e52b987dad92acb0c64eced92ed9ee105ad153cdb12d001410443bd44f683467e549dae7d20d1d79cbdb6df985c6e9c029c8d0c6cb46cc1a4d3cf7923c5021b27f7a0b562ada113bc85d5fda5a1b41e87fe6e8802817cf69996ffffffff0280651406000000001976a9145505614859643ab7b547cd7f1f5e7e2a12322d3788ac00aa0271000000001976a914ea4720a7a52fc166c55ff2298e07baf70ae67e1b88ac00000000010000000586c62cd602d219bb60edb14a3e204de0705176f9022fe49a538054fb14abb49e010000008c493046022100f2bc2aba2534becbdf062eb993853a42bbbc282083d0daf9b4b585bd401aa8c9022100b1d7fd7ee0b95600db8535bbf331b19eed8d961f7a8e54159c53675d5f69df8c014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff03ad0e58ccdac3df9dc28a218bcf6f1997b0a93306faaa4b3a28ae83447b2179010000008b483045022100be12b2937179da88599e27bb31c3525097a07cdb52422d165b3ca2f2020ffcf702200971b51f853a53d644ebae9ec8f3512e442b1bcb6c315a5b491d119d10624c83014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff2acfcab629bbc8685792603762c921580030ba144af553d271716a95089e107b010000008b483045022100fa579a840ac258871365dd48cd7552f96c8eea69bd00d84f05b283a0dab311e102207e3c0ee9234814cfbb1b659b83671618f45abc1326b9edcc77d552a4f2a805c0014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffffdcdc6023bbc9944a658ddc588e61eacb737ddf0a3cd24f113b5a8634c517fcd2000000008b4830450221008d6df731df5d32267954bd7d2dda2302b74c6c2a6aa5c0ca64ecbabc1af03c75022010e55c571d65da7701ae2da1956c442df81bbf076cdbac25133f99d98a9ed34c014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffffe15557cd5ce258f479dfd6dc6514edf6d7ed5b21fcfa4a038fd69f06b83ac76e010000008b483045022023b3e0ab071eb11de2eb1cc3a67261b866f86bf6867d4558165f7c8c8aca2d86022100dc6e1f53a91de3efe8f63512850811f26284b62f850c70ca73ed5de8771fb451014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff01404b4c00000000001976a9142b6ba7c9d796b75eef7942fc9288edd37c32f5c388ac00000000010000000166d7577163c932b4f9690ca6a80b6e4eb001f0a2fa9023df5595602aae96ed8d000000008a4730440220262b42546302dfb654a229cefc86432b89628ff259dc87edd1154535b16a67e102207b4634c020a97c3e7bbd0d4d19da6aa2269ad9dded4026e896b213d73ca4b63f014104979b82d02226b3a4597523845754d44f13639e3bf2df5e82c6aab2bdc79687368b01b1ab8b19875ae3c90d661a3d0a33161dab29934edeb36aa01976be3baf8affffffff02404b4c00000000001976a9144854e695a02af0aeacb823ccbc272134561e0a1688ac40420f00000000001976a914abee93376d6b37b5c2940655a6fcaf1c8e74237988ac0000000001000000014e3f8ef2e91349a9059cb4f01e54ab2597c1387161d3da89919f7ea6acdbb371010000008c49304602210081f3183471a5ca22307c0800226f3ef9c353069e0773ac76bb580654d56aa523022100d4c56465bdc069060846f4fbf2f6b20520b2a80b08b168b31e66ddb9c694e240014104976c79848e18251612f8940875b2b08d06e6dc73b9840e8860c066b7e87432c477e9a59a453e71e6d76d5fe34058b800a098fc1740ce3012e8fc8a00c96af966ffffffff02c0e1e400000000001976a9144134e75a6fcb6042034aab5e18570cf1f844f54788ac404b4c00000000001976a9142b6ba7c9d796b75eef7942fc9288edd37c32f5c388ac00000000',\n",
       "    'start_index': 9759,\n",
       "    'end_index': 15867},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '49c95db1e470fed04496d801c9d8fbb78155d2c7f855232c918823d2c17d0cf6',\n",
       "    'start_index': 7788,\n",
       "    'end_index': 7852}]},\n",
       " {'id': 370,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '76a914d63b766cd342e6f0f7390dd454065e4bbea26b1b88ac',\n",
       "    'start_index': 39483,\n",
       "    'end_index': 39533},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '9ec8177ca0a4f7aa21ec88a324f236a4d1dce6c610812a90e16febef4603a438',\n",
       "    'start_index': 39397,\n",
       "    'end_index': 39461},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '000102030405060708090A0B0C0D0E0FF0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF',\n",
       "    'start_index': 3685,\n",
       "    'end_index': 3749}]},\n",
       " {'id': 414,\n",
       "  'secrets': [{'type': 'Secret Keyword',\n",
       "    'secret_value': 'Brillio@2015',\n",
       "    'start_index': 1212,\n",
       "    'end_index': 1224}]},\n",
       " {'id': 416,\n",
       "  'secrets': [{'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=',\n",
       "    'start_index': 736691,\n",
       "    'end_index': 736756}]},\n",
       " {'id': 445,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '2904137A030AE2370A8CD3E068078A1D59A4F229',\n",
       "    'start_index': 749,\n",
       "    'end_index': 789},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '5974787496DFA27A4B7FE6023473FAE930EA41DC',\n",
       "    'start_index': 918,\n",
       "    'end_index': 958},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '1128ED677399F969E25D9453320B85EF3D3BA35A',\n",
       "    'start_index': 40,\n",
       "    'end_index': 80}]},\n",
       " {'id': 548,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '1dbd57365a500dc852bb214404100124bb361a19618c7734fcf28cb932bd9630',\n",
       "    'start_index': 1102,\n",
       "    'end_index': 1166}]},\n",
       " {'id': 588,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '02a7955281885bf0f0ca4a48678848cad8dc5b328ce8bc1d4481d041c98e891ff3',\n",
       "    'start_index': 2103,\n",
       "    'end_index': 2169},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '433080ff80d0d52d7f8bfffff47f00807f44f680000949b800007f7f7ff1017f',\n",
       "    'start_index': 1612,\n",
       "    'end_index': 1676}]},\n",
       " {'id': 703,\n",
       "  'secrets': [{'type': 'Secret Keyword',\n",
       "    'secret_value': 'root',\n",
       "    'start_index': 113,\n",
       "    'end_index': 117}]},\n",
       " {'id': 710,\n",
       "  'secrets': [{'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=',\n",
       "    'start_index': 4091,\n",
       "    'end_index': 4142},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM',\n",
       "    'start_index': 4509,\n",
       "    'end_index': 4580},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T',\n",
       "    'start_index': 359,\n",
       "    'end_index': 430},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1',\n",
       "    'start_index': 4289,\n",
       "    'end_index': 4360},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-fnmOCqbTlWIlj8LyTjo7mOUStjsKC4pOpQbqyi7RrhN7udi9RwhKkMHpvLbHG9Sr',\n",
       "    'start_index': 566,\n",
       "    'end_index': 637}]},\n",
       " {'id': 723,\n",
       "  'secrets': [{'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\SoapExtensionNotAvailableException',\n",
       "    'start_index': 557089,\n",
       "    'end_index': 557155},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\ClassIsFinalException',\n",
       "    'start_index': 550227,\n",
       "    'end_index': 550280},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '801c31d8ed748cfa537fa45402288c95',\n",
       "    'start_index': 2384,\n",
       "    'end_index': 2416},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '265b4faa2b3a9766332744949e83bf97',\n",
       "    'start_index': 5242,\n",
       "    'end_index': 5274},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'ec07570ca5a812141189b1fa81503674',\n",
       "    'start_index': 292,\n",
       "    'end_index': 324},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'e69f7f6ee287b969198c3c9d6777bd38',\n",
       "    'start_index': 630,\n",
       "    'end_index': 662},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'a4a119a56e50fbb293281d9a48007e0e',\n",
       "    'start_index': 187,\n",
       "    'end_index': 219},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'e397f74f8af3b1e56166a6e99f216ee7',\n",
       "    'start_index': 4239,\n",
       "    'end_index': 4271},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '07d7f1a47144818725fd8d91a907ac57',\n",
       "    'start_index': 3012,\n",
       "    'end_index': 3044},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '2c102faa651ef8ea5874edb585946bce',\n",
       "    'start_index': 2269,\n",
       "    'end_index': 2301},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'a1105708a18b76903365ca1c4aa61b02',\n",
       "    'start_index': 2575,\n",
       "    'end_index': 2607},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '573214ce1ef6f18100c43e366703d73c',\n",
       "    'start_index': 5955,\n",
       "    'end_index': 5987},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '7e9bd612cc444b3eed788ebbe46263a0',\n",
       "    'start_index': 2687,\n",
       "    'end_index': 2719},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'd02cf21124526632320d6f20b1bbf905',\n",
       "    'start_index': 3967,\n",
       "    'end_index': 3999},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'de95e0ac670b27c84ef8c5ac41fc1b34',\n",
       "    'start_index': 3701,\n",
       "    'end_index': 3733},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'cc8e14526dc240491e17a838cb78508c',\n",
       "    'start_index': 4823,\n",
       "    'end_index': 4855},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\ConfigurableMethodsAlreadyInitializedException',\n",
       "    'start_index': 550543,\n",
       "    'end_index': 550621},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '37a3dc5111fe8f707ab4c132ef1dbc62',\n",
       "    'start_index': 1842,\n",
       "    'end_index': 1874},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '320cde22f66dd4f5d3fd621d3e88b98f',\n",
       "    'start_index': 1074,\n",
       "    'end_index': 1106},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '667aeda72477189d0494fecd327c3641',\n",
       "    'start_index': 958,\n",
       "    'end_index': 990},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '377b22b161c09ed6e5152de788ca020a',\n",
       "    'start_index': 6387,\n",
       "    'end_index': 6419},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\MethodParametersAlreadyConfiguredException',\n",
       "    'start_index': 553209,\n",
       "    'end_index': 553283},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\MethodNameAlreadyConfiguredException',\n",
       "    'start_index': 552671,\n",
       "    'end_index': 552739},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'e6f3bc6883e449ab367280b34158c05b',\n",
       "    'start_index': 3420,\n",
       "    'end_index': 3452},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\ClassAlreadyExistsException',\n",
       "    'start_index': 550049,\n",
       "    'end_index': 550108},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '25072dd6e2470089de65ae7bf11d3109',\n",
       "    'start_index': 745,\n",
       "    'end_index': 777},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '6e3fae29631ef280660b3cdad06f25a8',\n",
       "    'start_index': 519,\n",
       "    'end_index': 551},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'c7a3c339e7e14b60e06a2d7fcce9476b',\n",
       "    'start_index': 5367,\n",
       "    'end_index': 5399},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\NoChildTestSuiteException',\n",
       "    'start_index': 559526,\n",
       "    'end_index': 559571},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '9cdd7b9056abc3081735233ba9dd9c7f',\n",
       "    'start_index': 5734,\n",
       "    'end_index': 5766},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '538ca81a9a966a6716601ecf48f4eaef',\n",
       "    'start_index': 2480,\n",
       "    'end_index': 2512},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '0d59ee240a4cd96ddbb4ff164fccea4d',\n",
       "    'start_index': 1179,\n",
       "    'end_index': 1211},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'f598d06aa772fa33d905e87be6398fb1',\n",
       "    'start_index': 850,\n",
       "    'end_index': 882},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'a0edc8309cc5e1d60e3047b5df6b7052',\n",
       "    'start_index': 1398,\n",
       "    'end_index': 1430},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '8825ede83f2f289127722d4e842cf7e8',\n",
       "    'start_index': 1622,\n",
       "    'end_index': 1654},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'd59fbae42019aedf227094ac49a46f50',\n",
       "    'start_index': 3567,\n",
       "    'end_index': 3599},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\TextUI\\\\\\\\XmlConfiguration\\\\\\\\UpdateSchemaLocationTo93',\n",
       "    'start_index': 580915,\n",
       "    'end_index': 580974},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\InvalidCoversTargetException',\n",
       "    'start_index': 547829,\n",
       "    'end_index': 547877},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\Rule\\\\\\\\InvokedAtLeastCount',\n",
       "    'start_index': 555856,\n",
       "    'end_index': 555913},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'f0906e6318348a765ffb6eb24e0d0938',\n",
       "    'start_index': 5489,\n",
       "    'end_index': 5521},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\ComparisonMethodDoesNotDeclareBoolReturnTypeException',\n",
       "    'start_index': 537090,\n",
       "    'end_index': 537163},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '290dd4ba42f11019134caca05dbefe3f',\n",
       "    'start_index': 6070,\n",
       "    'end_index': 6102},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'c964ee0ededf28c96ebd9db5099ef910',\n",
       "    'start_index': 1508,\n",
       "    'end_index': 1540},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'ed962a97bd972bc82007176b647d4e36',\n",
       "    'start_index': 6175,\n",
       "    'end_index': 6207},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\CannotUseAddMethodsException',\n",
       "    'start_index': 549687,\n",
       "    'end_index': 549747},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '9d3db23ca418094bcf0b641a0c9559ed',\n",
       "    'start_index': 4528,\n",
       "    'end_index': 4560},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'e39a8b23c42d4e1452234d762b03835a',\n",
       "    'start_index': 2809,\n",
       "    'end_index': 2841},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\MatcherAlreadyRegisteredException',\n",
       "    'start_index': 552161,\n",
       "    'end_index': 552226},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'def43f6c87e4f8dfd0c9e1b1bab14fe8',\n",
       "    'start_index': 2062,\n",
       "    'end_index': 2094},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '5c70426340c07411ceee79728a2304a8',\n",
       "    'start_index': 2907,\n",
       "    'end_index': 2939},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'b6b991a57620e2fb6b2f66f03fe9ddc2',\n",
       "    'start_index': 1735,\n",
       "    'end_index': 1767},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\ReturnValueNotConfiguredException',\n",
       "    'start_index': 554885,\n",
       "    'end_index': 554950},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\ConfigurableMethod',\n",
       "    'start_index': 550393,\n",
       "    'end_index': 550443},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'b067bc7112e384b61c701452d53a14a8',\n",
       "    'start_index': 1954,\n",
       "    'end_index': 1986},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\MethodCannotBeConfiguredException',\n",
       "    'start_index': 552481,\n",
       "    'end_index': 552546},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\OriginalConstructorInvocationRequiredException',\n",
       "    'start_index': 554507,\n",
       "    'end_index': 554585},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\BadMethodCallException',\n",
       "    'start_index': 548583,\n",
       "    'end_index': 548637},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '751a5a3f463e4be759be31748b61737c',\n",
       "    'start_index': 5103,\n",
       "    'end_index': 5135},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '0e6d7bf4a5811bfa5cf40c5ccd6fae6a',\n",
       "    'start_index': 411,\n",
       "    'end_index': 443},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'b0b88a3b89caae681462c58ff19a7059',\n",
       "    'start_index': 4682,\n",
       "    'end_index': 4714},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\ActualValueIsNotAnObjectException',\n",
       "    'start_index': 536333,\n",
       "    'end_index': 536386},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'b6c2870932b0250c10334a86dcb33c7f',\n",
       "    'start_index': 3830,\n",
       "    'end_index': 3862},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\MethodNameNotConfiguredException',\n",
       "    'start_index': 553021,\n",
       "    'end_index': 553085},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\IncompatibleReturnValueException',\n",
       "    'start_index': 551205,\n",
       "    'end_index': 551269},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'da94ac5d3ca7d2dbab84ce561ce72bfd',\n",
       "    'start_index': 3145,\n",
       "    'end_index': 3177},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '6124b4c8570aa390c21fafd04a26c69f',\n",
       "    'start_index': 5842,\n",
       "    'end_index': 5874},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '3d97c8dcdfba8cb85d3b34f116bb248b',\n",
       "    'start_index': 3283,\n",
       "    'end_index': 3315},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\CannotUseOnlyMethodsException',\n",
       "    'start_index': 549867,\n",
       "    'end_index': 549928},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '58571171fd5812e6e447dce228f52f4d',\n",
       "    'start_index': 5613,\n",
       "    'end_index': 5645},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '7b11c4dc42b3b3023073cb14e519683c',\n",
       "    'start_index': 1284,\n",
       "    'end_index': 1316},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'd919fc9d5ad52cfb7f322f7fe36458ab',\n",
       "    'start_index': 4099,\n",
       "    'end_index': 4131},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '786bf90caabc9e09b6ad4cc5ca8f0e30',\n",
       "    'start_index': 4959,\n",
       "    'end_index': 4991},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\TextUI\\\\\\\\XmlConfiguration\\\\\\\\RemoveCacheTokensAttribute',\n",
       "    'start_index': 578860,\n",
       "    'end_index': 578921},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '017b24472353920ed42bb364f7653c43',\n",
       "    'start_index': 6275,\n",
       "    'end_index': 6307},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'd154b49fab8e4da34fb553a2d644918c',\n",
       "    'start_index': 4384,\n",
       "    'end_index': 4416},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '8a9dc1de0ca7e01f3e08231539562f61',\n",
       "    'start_index': 2167,\n",
       "    'end_index': 2199},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\MatchBuilderNotFoundException',\n",
       "    'start_index': 551851,\n",
       "    'end_index': 551912},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\TextUI\\\\\\\\XmlConfiguration\\\\\\\\MoveWhitelistExcludesToCoverage',\n",
       "    'start_index': 578241,\n",
       "    'end_index': 578307},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\InvalidMethodNameException',\n",
       "    'start_index': 551393,\n",
       "    'end_index': 551451}]},\n",
       " {'id': 739,\n",
       "  'secrets': [{'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-IQsoLXl5PILFhosVNubq5LC7Qb9DXgDA9i+tQ8Zj3iwWAwPtgFTxbJ8NT4GN1R8p',\n",
       "    'start_index': 8678,\n",
       "    'end_index': 8749},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM',\n",
       "    'start_index': 8460,\n",
       "    'end_index': 8531},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-AYmEC3Yw5cVb3ZcuHtOA93w35dYTsvhLPVnYs9eStHfGJvOvKxVfELGroGkvsg+p',\n",
       "    'start_index': 418,\n",
       "    'end_index': 489},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-gXt9imSW0VcJVHezoNQsP+TNrjYXoGcrqBZJpry9zJt8PCQjobwmhMGaDHTASo9N',\n",
       "    'start_index': 642,\n",
       "    'end_index': 713},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-cVKIPhGWiC2Al4u+LWgxfKTRIcfu0JTxR+EQDz/bgldoEyl4H0zUF0QKbrJ0EcQF',\n",
       "    'start_index': 8884,\n",
       "    'end_index': 8955}]},\n",
       " {'id': 759,\n",
       "  'secrets': [{'type': 'Secret Keyword',\n",
       "    'secret_value': 'required|min:5',\n",
       "    'start_index': 1498,\n",
       "    'end_index': 1512}]},\n",
       " {'id': 761,\n",
       "  'secrets': [{'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha512-nwpMzLYxfwDnu68Rt9PqLqgVtHkIJxEPrlu3PfTfLQKVgBAlTKDmim1JvCGNyNRtyvCx1nNIVBfYm8UZotWd4Q==',\n",
       "    'start_index': 713,\n",
       "    'end_index': 808}]},\n",
       " {'id': 899,\n",
       "  'secrets': [{'type': 'JSON Web Token',\n",
       "    'secret_value': 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiJ0a2VlbCIsImV4cCI6MTY0NDgyMzA2Miwic3ViIjoidXNyLTMzNzM3OTQ1YzJiNzE4ZGI0YzMwOWQ2MzNkMmYifQ.',\n",
       "    'start_index': 6746,\n",
       "    'end_index': 6882},\n",
       "   {'type': 'JSON Web Token',\n",
       "    'secret_value': 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiJ0a2VlbCIsImV4cCI6MTY0NDQwMDAzMiwic3ViIjoidXNyLTMzNzM3OTQ1YzJiNzE4ZGI0YzMwOWQ2MzNkMmYifQ.',\n",
       "    'start_index': 1555,\n",
       "    'end_index': 1691},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'OTY0ZTkxM2UtYjA3OC0zNzM0LWJlYTAtODdiZDY4YzRjMGM4',\n",
       "    'start_index': 3516,\n",
       "    'end_index': 3564},\n",
       "   {'type': 'JSON Web Token',\n",
       "    'secret_value': 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiJ0a2VlbCIsImV4cCI6MTY0NDQwNDAzMSwic3ViIjoidXNyLTMzNzM3OTQ1YzJiNzE4ZGI0YzMwOWQ2MzNkMmYifQ.',\n",
       "    'start_index': 4620,\n",
       "    'end_index': 4756}]},\n",
       " {'id': 960,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'd6edae2002e2df530bd14e8cb27eb6dce1a29fe15b5ec614d9c3b7610fe00d96',\n",
       "    'start_index': 555,\n",
       "    'end_index': 619},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274',\n",
       "    'start_index': 1027,\n",
       "    'end_index': 1091},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'aebb1a098a77f6e9477c5f426b363895d2f0cc77c46a3d84c871a9fab2f08d54',\n",
       "    'start_index': 644,\n",
       "    'end_index': 708},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '2998837ff866b54bdf6cdaf923417cc98a2dbe7fa831142c092ebb724694fa42',\n",
       "    'start_index': 737,\n",
       "    'end_index': 801},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '17e976657779458a4b7aebd420a0916fa7f2746e6530602ff13a55cf81469462',\n",
       "    'start_index': 372,\n",
       "    'end_index': 436},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'eb75dda827c656a33be6e60f18b3943c4dd4252205e557ec95d1cf44df8e3a35',\n",
       "    'start_index': 249,\n",
       "    'end_index': 313},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '098a3b74c65c030729fad809210f1e31d96a2295610376989134be981f4fcc30',\n",
       "    'start_index': 461,\n",
       "    'end_index': 525}]},\n",
       " {'id': 968,\n",
       "  'secrets': [{'type': 'Secret Keyword',\n",
       "    'secret_value': 'nil',\n",
       "    'start_index': 206,\n",
       "    'end_index': 209}]},\n",
       " {'id': 975,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '71f05afc51e3d9b03376b2f98fd452d3a274d595',\n",
       "    'start_index': 171,\n",
       "    'end_index': 211}]},\n",
       " {'id': 978,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '7f6c8dc83d77134b5a3a1c53f1202b395b04482b',\n",
       "    'start_index': 7737,\n",
       "    'end_index': 7777}]},\n",
       " {'id': 986,\n",
       "  'secrets': [{'type': 'Slack Token',\n",
       "    'secret_value': 'xoxb-3242497751-XHFJhTNa87987dhADff7873A',\n",
       "    'start_index': 934,\n",
       "    'end_index': 974},\n",
       "   {'type': 'Secret Keyword',\n",
       "    'secret_value': 'secret',\n",
       "    'start_index': 893,\n",
       "    'end_index': 899}]},\n",
       " {'id': 992,\n",
       "  'secrets': [{'type': 'Basic Auth Credentials',\n",
       "    'secret_value': 'password',\n",
       "    'start_index': 68,\n",
       "    'end_index': 76}]}]"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "eeb946e2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting gibberish-detector\n",
      "  Downloading gibberish_detector-0.1.1-py3-none-any.whl (10 kB)\n",
      "Installing collected packages: gibberish-detector\n",
      "Successfully installed gibberish-detector-0.1.1\n",
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m22.3\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install gibberish-detector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "710a9654",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "False\n"
     ]
    }
   ],
   "source": [
    "from gibberish_detector import detector\n",
    "\n",
    "# pip install gibberish-detector\n",
    "# download the training corpora from https://raw.githubusercontent.com/domanchi/gibberish-detector/master/examples/big.txt\n",
    "# run gibberish-detector train big.txt > big.model to generate the model (it takes 3 seconds)\n",
    "Detector = detector.create_from_model('big.model')\n",
    "print(Detector.is_gibberish('//password'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6358d0b7",
   "metadata": {},
   "source": [
    "### Analysis of the results\n",
    "Looking at the samples it seems like most of them are detected with:\n",
    "* Hex High Entropy String, some other regex detectors like JSON Web Token, and these work well.\n",
    "* Many are also detected with Base64 High Entropy String, but this is has some false positives like paths from one file. We will see that the **Gibberish-detector** can filter them out. \n",
    "* Then many are detected with the Secret Keyword Detector and most of them aren't secrets, this is expected according to docs since it variable names that are often associated with secrets. \n",
    "* There is also one detection with Basic Auth Credentials, but this is a false positive (same was observed in codeparrot data scanning)\n",
    "\n",
    "=> We will keep Hex High Entropy String, Base64 High Entropy String with Gibberish detector on top, remove Secret Keyword Detector and Basic Auth Credentials. And keep the other regex based detectors, we need to test how much they detect on a larger dataset, we also remove [PrivateKeyDetector](https://github.com/Yelp/detect-secrets/blob/6bf879011cea4d280daee08a89bdc1002fd4fc53/detect_secrets/plugins/private_key.py), it checks for keywords too\n",
    "\n",
    "Note: Later we try changing  the limit parameter of the BASE64 entropy detector, but it doesn't hep"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "d8678aef",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "total number of detected secrets in 28 files (among 1k) is: 134\n"
     ]
    }
   ],
   "source": [
    "print(f\"total number of detected secrets in {len(result)} files (among 1k) is: {sum([len(e['secrets']) for e in result])}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "18d4eeec",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "non gibberish secrets\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\SoapExtensionNotAvailableException\n",
      " id:723 start_index:557089\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\ClassIsFinalException\n",
      " id:723 start_index:550227\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\ConfigurableMethodsAlreadyInitializedException\n",
      " id:723 start_index:550543\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\MethodParametersAlreadyConfiguredException\n",
      " id:723 start_index:553209\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\MethodNameAlreadyConfiguredException\n",
      " id:723 start_index:552671\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\ClassAlreadyExistsException\n",
      " id:723 start_index:550049\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\NoChildTestSuiteException\n",
      " id:723 start_index:559526\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\TextUI\\\\XmlConfiguration\\\\UpdateSchemaLocationTo93\n",
      " id:723 start_index:580915\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\InvalidCoversTargetException\n",
      " id:723 start_index:547829\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\Rule\\\\InvokedAtLeastCount\n",
      " id:723 start_index:555856\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\ComparisonMethodDoesNotDeclareBoolReturnTypeException\n",
      " id:723 start_index:537090\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\CannotUseAddMethodsException\n",
      " id:723 start_index:549687\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\MatcherAlreadyRegisteredException\n",
      " id:723 start_index:552161\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\ReturnValueNotConfiguredException\n",
      " id:723 start_index:554885\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\ConfigurableMethod\n",
      " id:723 start_index:550393\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\MethodCannotBeConfiguredException\n",
      " id:723 start_index:552481\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\OriginalConstructorInvocationRequiredException\n",
      " id:723 start_index:554507\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\BadMethodCallException\n",
      " id:723 start_index:548583\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\ActualValueIsNotAnObjectException\n",
      " id:723 start_index:536333\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\MethodNameNotConfiguredException\n",
      " id:723 start_index:553021\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\IncompatibleReturnValueException\n",
      " id:723 start_index:551205\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\CannotUseOnlyMethodsException\n",
      " id:723 start_index:549867\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\TextUI\\\\XmlConfiguration\\\\RemoveCacheTokensAttribute\n",
      " id:723 start_index:578860\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\MatchBuilderNotFoundException\n",
      " id:723 start_index:551851\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\TextUI\\\\XmlConfiguration\\\\MoveWhitelistExcludesToCoverage\n",
      " id:723 start_index:578241\n",
      "\n",
      "723\n",
      "secret:PHPUnit\\\\Framework\\\\MockObject\\\\InvalidMethodNameException\n",
      " id:723 start_index:551393\n",
      "\n",
      "723\n"
     ]
    }
   ],
   "source": [
    "print(\"non gibberish secrets\")\n",
    "for detection in result:\n",
    "    id = detection['id']\n",
    "    for d in detection[\"secrets\"]:\n",
    "        if d[\"type\"] == \"Base64 High Entropy String\":\n",
    "            if not Detector.is_gibberish(d[\"secret_value\"]):\n",
    "                print(f\"secret:{d['secret_value']}\\n id:{id} start_index:{d['start_index']}\\n\")\n",
    "                print(detection[\"id\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "e705c0af",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "otUseAddMethodsException.php',\n",
      "        'PHPUnit\\\\Framework\\\\MockObject\\\\CannotUseOnlyMethodsException' => __DIR__ . '/..' . '/phpunit/phpunit/src/Framework/MockObject/Exception/CannotUseOnlyMethodsException.php',\n",
      "        'PHPUnit\\\\Framework\\\\MockObject\\\\ClassAlreadyExistsException' => __DIR__ . '/..' . '/phpunit/phpunit/src/Framework/MockObject/Exception/ClassAlreadyExistsException.php',\n",
      "        'PHPUnit\\\\Framework\\\\MockObject\\\\ClassIsFinalException' => __DIR__ . '/..' . '/phpunit/phpunit/src/Framework/MockObject/Exception/ClassIsFinalException.php',\n",
      "        'PHPUnit\\\\Framework\\\\MockObject\\\\ConfigurableMethod' => __DIR__ . '/..' . '/phpunit/phpunit/src/Framework/MockObject/ConfigurableMethod.php',\n",
      "        'PHPUnit\\\\Framework\\\\MockObject\\\\ConfigurableMethodsAlreadyInitializedException' => _\n"
     ]
    }
   ],
   "source": [
    "print(ds[723][\"content\"][550227-400:550227+400])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f8a18b9a",
   "metadata": {},
   "source": [
    "Conclusion: Gibberish detector can filter the false postives of this detector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "a9513c63",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "gibberish secrets\n",
      "secret:ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n",
      " id:416 start_index:736691\n",
      "\n",
      "416\n",
      "secret:sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=\n",
      " id:710 start_index:4091\n",
      "\n",
      "710\n",
      "secret:sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM\n",
      " id:710 start_index:4509\n",
      "\n",
      "710\n",
      "secret:sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T\n",
      " id:710 start_index:359\n",
      "\n",
      "710\n",
      "secret:sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1\n",
      " id:710 start_index:4289\n",
      "\n",
      "710\n",
      "secret:sha384-fnmOCqbTlWIlj8LyTjo7mOUStjsKC4pOpQbqyi7RrhN7udi9RwhKkMHpvLbHG9Sr\n",
      " id:710 start_index:566\n",
      "\n",
      "710\n",
      "secret:sha384-IQsoLXl5PILFhosVNubq5LC7Qb9DXgDA9i+tQ8Zj3iwWAwPtgFTxbJ8NT4GN1R8p\n",
      " id:739 start_index:8678\n",
      "\n",
      "739\n",
      "secret:sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM\n",
      " id:739 start_index:8460\n",
      "\n",
      "739\n",
      "secret:sha384-AYmEC3Yw5cVb3ZcuHtOA93w35dYTsvhLPVnYs9eStHfGJvOvKxVfELGroGkvsg+p\n",
      " id:739 start_index:418\n",
      "\n",
      "739\n",
      "secret:sha384-gXt9imSW0VcJVHezoNQsP+TNrjYXoGcrqBZJpry9zJt8PCQjobwmhMGaDHTASo9N\n",
      " id:739 start_index:642\n",
      "\n",
      "739\n",
      "secret:sha384-cVKIPhGWiC2Al4u+LWgxfKTRIcfu0JTxR+EQDz/bgldoEyl4H0zUF0QKbrJ0EcQF\n",
      " id:739 start_index:8884\n",
      "\n",
      "739\n",
      "secret:sha512-nwpMzLYxfwDnu68Rt9PqLqgVtHkIJxEPrlu3PfTfLQKVgBAlTKDmim1JvCGNyNRtyvCx1nNIVBfYm8UZotWd4Q==\n",
      " id:761 start_index:713\n",
      "\n",
      "761\n",
      "secret:OTY0ZTkxM2UtYjA3OC0zNzM0LWJlYTAtODdiZDY4YzRjMGM4\n",
      " id:899 start_index:3516\n",
      "\n",
      "899\n"
     ]
    }
   ],
   "source": [
    "print(\"gibberish secrets\")\n",
    "for detection in result:\n",
    "    id = detection['id']\n",
    "    for d in detection[\"secrets\"]:\n",
    "        if d[\"type\"] == \"Base64 High Entropy String\":\n",
    "            if Detector.is_gibberish(d[\"secret_value\"]):\n",
    "                print(f\"secret:{d['secret_value']}\\n id:{id} start_index:{d['start_index']}\\n\")\n",
    "                print(detection[\"id\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "724308e4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\"UTF-8\">\n",
      "    <meta content=\"width=device-width, initial-scale=1, maximum-scale=1, shrink-to-fit=no\" name=\"viewport\">\n",
      "    <title>Login</title>\n",
      "\n",
      "    <!-- General CSS Files -->\n",
      "    <link rel=\"stylesheet\" href=\"https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css\"\n",
      "        integrity=\"sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T\" crossorigin=\"anonymous\">\n",
      "    <link rel=\"stylesheet\" href=\"https://use.fontawesome.com/releases/v5.7.2/css/all.css\"\n",
      "        integrity=\"sha384-fnmOCqbTlWIlj8LyTjo7mOUStjsKC4pOpQbqyi7RrhN7udi9RwhKkMHpvLbHG9Sr\" crossorigin=\"anonymous\">\n",
      "\n",
      "    <!-- CSS Libraries -->\n",
      "    <link rel=\"stylesheet\" href=\"../node_modules/bootstrap-social/b\n"
     ]
    }
   ],
   "source": [
    "print(ds[710][\"content\"][359-300:359+400])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "265bc597",
   "metadata": {},
   "source": [
    "## Detection with selected plugins and filters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "3c931ed8",
   "metadata": {},
   "outputs": [],
   "source": [
    "secrets = SecretsCollection()\n",
    "filters = [\n",
    "    # some filters from [original list](https://github.com/Yelp/detect-secrets/blob/master/docs/filters.md#built-in-filters) \n",
    "    # were removed based on their goal\n",
    "    {'path': 'detect_secrets.filters.heuristic.is_sequential_string'},\n",
    "    {'path': 'detect_secrets.filters.heuristic.is_potential_uuid'},\n",
    "    {'path': 'detect_secrets.filters.heuristic.is_likely_id_string'},\n",
    "    {'path': 'detect_secrets.filters.heuristic.is_templated_secret'},\n",
    "    {'path': 'detect_secrets.filters.heuristic.is_sequential_string'},\n",
    "]\n",
    "plugins = [\n",
    "        # remove 3 plugins for keyword\n",
    "        {'name': 'ArtifactoryDetector'},\n",
    "        {'name': 'AWSKeyDetector'},\n",
    "        # the entropy detectors esp Base64 need the gibberish detector on top\n",
    "        {'name': 'Base64HighEntropyString'},\n",
    "        {'name': 'HexHighEntropyString'},\n",
    "        {'name': 'AzureStorageKeyDetector'},\n",
    "        {'name': 'CloudantDetector'},\n",
    "        {'name': 'DiscordBotTokenDetector'},\n",
    "        {'name': 'GitHubTokenDetector'},\n",
    "        {'name': 'IbmCloudIamDetector'},\n",
    "        {'name': 'IbmCosHmacDetector'},\n",
    "        {'name': 'JwtTokenDetector'},\n",
    "        {'name': 'MailchimpDetector'},\n",
    "        {'name': 'NpmDetector'},\n",
    "        {'name': 'SendGridDetector'},\n",
    "        {'name': 'SlackDetector'},\n",
    "        {'name': 'SoftlayerDetector'},\n",
    "        {'name': 'StripeDetector'},\n",
    "        {'name': 'TwilioKeyDetector'},\n",
    "        #{'name': 'BasicAuthDetector'},\n",
    "        #{'name': 'KeywordDetector'},\n",
    "        #{'name': 'PrivateKeyDetector'},\n",
    "    ]\n",
    "\n",
    "def scan_str_content(content, suffix=\".txt\"):\n",
    "    \"\"\"Detect secret keys in content with selected plugins and filters\n",
    "    Args:\n",
    "        content (str): content to scan\n",
    "        suffix (str): suffix of the file\n",
    "    Returns:\n",
    "        list: list of secrets found\"\"\"\n",
    "    \n",
    "    fp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False, mode='w')\n",
    "    fp.write(content)\n",
    "    fp.close()\n",
    "    secrets = SecretsCollection()\n",
    "    with transient_settings({'plugins_used': plugins, 'filters_used': filters}) as settings:\n",
    "    #with default_settings() as settings:\n",
    "        secrets.scan_file(fp.name)\n",
    "    os.unlink(fp.name)\n",
    "    secrets_set = list(secrets.data.values())\n",
    "    result = []\n",
    "    if secrets_set:\n",
    "        for secret in secrets_set[0]:\n",
    "            result.append({\n",
    "                'type': secret.type,\n",
    "                'secret_value': secret.secret_value,\n",
    "                'start_index': content.index(secret.secret_value),\n",
    "                'end_index': content.index(secret.secret_value) + len(secret.secret_value),\n",
    "            })\n",
    "    return result\n",
    "\n",
    "\n",
    "def scan_secrets_batch(examples):\n",
    "    \"\"\"Scan a batch of examples from a dataset for secret keys\n",
    "    This add two columns to the dataset:\n",
    "    - pii: (list) of secrets found\n",
    "    - has_pii: (bool) whether the example contains secret\"\"\"\n",
    "\n",
    "    list_secrets = []\n",
    "    list_types = []\n",
    "    list_limits = []\n",
    "    has_secrets = []\n",
    "    for text in examples[\"content\"]:\n",
    "        output = scan_str_content(text, suffix=\".txt\")\n",
    "        if  output:\n",
    "            # get secret values of each element in output\n",
    "            # to add this in datasets we need same number of samples in each row\n",
    "            # we save it as str instead of list\n",
    "            secrets = str([e['secret_value'] for e in output])\n",
    "            types = str([e['type'] for e in output])\n",
    "            limits = str([(e['start_index'], e['end_index']) for e in output])\n",
    "            list_secrets.append(secrets)\n",
    "            list_types.append(types)\n",
    "            list_limits.append(limits)\n",
    "            has_secrets.append(True)\n",
    "        else:\n",
    "            list_secrets.append(\"\")\n",
    "            list_types.append(\"\")\n",
    "            list_limits.append(\"\")\n",
    "            has_secrets.append(False)\n",
    "    return {\"secrets\": list_secrets, \"types\": list_types, \"has_secrets\": has_secrets}\n",
    "\n",
    "\n",
    "def scan_secrets_batch_viz(examples):\n",
    "    outputs = []\n",
    "    for i, text in enumerate(examples[\"content\"]):\n",
    "        output = scan_str_content(text, suffix=\".txt\")\n",
    "        if  output:\n",
    "            outputs.append({\"id\": i, \"secrets\": output})\n",
    "    return outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "b6314b47",
   "metadata": {},
   "outputs": [],
   "source": [
    "# use multiprocessing to scan the dataset, takes few seconds\n",
    "result_custom = scan_secrets_batch_viz(ds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "8d18bb70",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "total number of detected secrets in 20 files (among 1k) is: 124\n"
     ]
    }
   ],
   "source": [
    "print(f\"total number of detected secrets in {len(result_custom)} files (among 1k) is: {sum([len(e['secrets']) for e in result_custom])}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "84c3d3fc",
   "metadata": {},
   "source": [
    "this is 8 file sless than before and 10 secrets less"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "afa7064a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'id': 12,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '3800a9169891c0554775b12cbf5d79f6eb50ccb5f95630536a4cecd7a18aa34b',\n",
       "    'start_index': 2316,\n",
       "    'end_index': 2380},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '75e5849b1a27d71e74de1390a4fc81c38b4ed8ce24d4efb2c9a5807d0e82106c',\n",
       "    'start_index': 2449,\n",
       "    'end_index': 2513},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'be6bbdf83a789fd2b7e5ac8e2954f510e92115bb9e1c84591f6adb4055a3b845',\n",
       "    'start_index': 2080,\n",
       "    'end_index': 2144}]},\n",
       " {'id': 25,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'bf2c7ce40b04ae811af714deb512510cc2c17b9ab9d6ddcf49fe4487eea7af3d',\n",
       "    'start_index': 194,\n",
       "    'end_index': 258},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '56c932549852cddcfafdab3820b0200c7742675be92179e59e6215b340e26467',\n",
       "    'start_index': 545,\n",
       "    'end_index': 609}]},\n",
       " {'id': 34,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '7ab18906739e4662ac01e69f5ebb7352',\n",
       "    'start_index': 1168,\n",
       "    'end_index': 1200}]},\n",
       " {'id': 64,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '17470674346319559612580175475351973007892815102',\n",
       "    'start_index': 7911,\n",
       "    'end_index': 7958}]},\n",
       " {'id': 84,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f',\n",
       "    'start_index': 6655,\n",
       "    'end_index': 6719}]},\n",
       " {'id': 320,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '49c95db1e470fed04496d801c9d8fbb78155d2c7f855232c918823d2c17d0cf6',\n",
       "    'start_index': 7788,\n",
       "    'end_index': 7852},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '0100000090f0a9f110702f808219ebea1173056042a714bad51b916cb6800000000000005275289558f51c9966699404ae2294730c3c9f9bda53523ce50e9b95e558da2fdb261b4d4c86041b1ab1bf930901000000010000000000000000000000000000000000000000000000000000000000000000ffffffff07044c86041b0146ffffffff0100f2052a01000000434104e18f7afbe4721580e81e8414fc8c24d7cfacf254bb5c7b949450c3e997c2dc1242487a8169507b631eb3771f2b425483fb13102c4eb5d858eef260fe70fbfae0ac00000000010000000196608ccbafa16abada902780da4dc35dafd7af05fa0da08cf833575f8cf9e836000000004a493046022100dab24889213caf43ae6adc41cf1c9396c08240c199f5225acf45416330fd7dbd022100fe37900e0644bf574493a07fc5edba06dbc07c311b947520c2d514bc5725dcb401ffffffff0100f2052a010000001976a914f15d1921f52e4007b146dfa60f369ed2fc393ce288ac000000000100000001fb766c1288458c2bafcfec81e48b24d98ec706de6b8af7c4e3c29419bfacb56d000000008c493046022100f268ba165ce0ad2e6d93f089cfcd3785de5c963bb5ea6b8c1b23f1ce3e517b9f022100da7c0f21adc6c401887f2bfd1922f11d76159cbc597fbd756a23dcbb00f4d7290141042b4e8625a96127826915a5b109852636ad0da753c9e1d5606a50480cd0c40f1f8b8d898235e571fe9357d9ec842bc4bba1827daaf4de06d71844d0057707966affffffff0280969800000000001976a9146963907531db72d0ed1a0cfb471ccb63923446f388ac80d6e34c000000001976a914f0688ba1c0d1ce182c7af6741e02658c7d4dfcd388ac000000000100000002c40297f730dd7b5a99567eb8d27b78758f607507c52292d02d4031895b52f2ff010000008b483045022100f7edfd4b0aac404e5bab4fd3889e0c6c41aa8d0e6fa122316f68eddd0a65013902205b09cc8b2d56e1cd1f7f2fafd60a129ed94504c4ac7bdc67b56fe67512658b3e014104732012cb962afa90d31b25d8fb0e32c94e513ab7a17805c14ca4c3423e18b4fb5d0e676841733cb83abaf975845c9f6f2a8097b7d04f4908b18368d6fc2d68ecffffffffca5065ff9617cbcba45eb23726df6498a9b9cafed4f54cbab9d227b0035ddefb000000008a473044022068010362a13c7f9919fa832b2dee4e788f61f6f5d344a7c2a0da6ae740605658022006d1af525b9a14a35c003b78b72bd59738cd676f845d1ff3fc25049e01003614014104732012cb962afa90d31b25d8fb0e32c94e513ab7a17805c14ca4c3423e18b4fb5d0e676841733cb83abaf975845c9f6f2a8097b7d04f4908b18368d6fc2d68ecffffffff01001ec4110200000043410469ab4181eceb28985b9b4e895c13fa5e68d85761b7eee311db5addef76fa8621865134a221bd01f28ec9999ee3e021e60766e9d1f3458c115fb28650605f11c9ac000000000100000001cdaf2f758e91c514655e2dc50633d1e4c84989f8aa90a0dbc883f0d23ed5c2fa010000008b48304502207ab51be6f12a1962ba0aaaf24a20e0b69b27a94fac5adf45aa7d2d18ffd9236102210086ae728b370e5329eead9accd880d0cb070aea0c96255fae6c4f1ddcce1fd56e014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff02404b4c00000000001976a9142b6ba7c9d796b75eef7942fc9288edd37c32f5c388ac002d3101000000001976a9141befba0cdc1ad56529371864d9f6cb042faa06b588ac000000000100000001b4a47603e71b61bc3326efd90111bf02d2f549b067f4c4a8fa183b57a0f800cb010000008a4730440220177c37f9a505c3f1a1f0ce2da777c339bd8339ffa02c7cb41f0a5804f473c9230220585b25a2ee80eb59292e52b987dad92acb0c64eced92ed9ee105ad153cdb12d001410443bd44f683467e549dae7d20d1d79cbdb6df985c6e9c029c8d0c6cb46cc1a4d3cf7923c5021b27f7a0b562ada113bc85d5fda5a1b41e87fe6e8802817cf69996ffffffff0280651406000000001976a9145505614859643ab7b547cd7f1f5e7e2a12322d3788ac00aa0271000000001976a914ea4720a7a52fc166c55ff2298e07baf70ae67e1b88ac00000000010000000586c62cd602d219bb60edb14a3e204de0705176f9022fe49a538054fb14abb49e010000008c493046022100f2bc2aba2534becbdf062eb993853a42bbbc282083d0daf9b4b585bd401aa8c9022100b1d7fd7ee0b95600db8535bbf331b19eed8d961f7a8e54159c53675d5f69df8c014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff03ad0e58ccdac3df9dc28a218bcf6f1997b0a93306faaa4b3a28ae83447b2179010000008b483045022100be12b2937179da88599e27bb31c3525097a07cdb52422d165b3ca2f2020ffcf702200971b51f853a53d644ebae9ec8f3512e442b1bcb6c315a5b491d119d10624c83014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff2acfcab629bbc8685792603762c921580030ba144af553d271716a95089e107b010000008b483045022100fa579a840ac258871365dd48cd7552f96c8eea69bd00d84f05b283a0dab311e102207e3c0ee9234814cfbb1b659b83671618f45abc1326b9edcc77d552a4f2a805c0014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffffdcdc6023bbc9944a658ddc588e61eacb737ddf0a3cd24f113b5a8634c517fcd2000000008b4830450221008d6df731df5d32267954bd7d2dda2302b74c6c2a6aa5c0ca64ecbabc1af03c75022010e55c571d65da7701ae2da1956c442df81bbf076cdbac25133f99d98a9ed34c014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffffe15557cd5ce258f479dfd6dc6514edf6d7ed5b21fcfa4a038fd69f06b83ac76e010000008b483045022023b3e0ab071eb11de2eb1cc3a67261b866f86bf6867d4558165f7c8c8aca2d86022100dc6e1f53a91de3efe8f63512850811f26284b62f850c70ca73ed5de8771fb451014104462e76fd4067b3a0aa42070082dcb0bf2f388b6495cf33d789904f07d0f55c40fbd4b82963c69b3dc31895d0c772c812b1d5fbcade15312ef1c0e8ebbb12dcd4ffffffff01404b4c00000000001976a9142b6ba7c9d796b75eef7942fc9288edd37c32f5c388ac00000000010000000166d7577163c932b4f9690ca6a80b6e4eb001f0a2fa9023df5595602aae96ed8d000000008a4730440220262b42546302dfb654a229cefc86432b89628ff259dc87edd1154535b16a67e102207b4634c020a97c3e7bbd0d4d19da6aa2269ad9dded4026e896b213d73ca4b63f014104979b82d02226b3a4597523845754d44f13639e3bf2df5e82c6aab2bdc79687368b01b1ab8b19875ae3c90d661a3d0a33161dab29934edeb36aa01976be3baf8affffffff02404b4c00000000001976a9144854e695a02af0aeacb823ccbc272134561e0a1688ac40420f00000000001976a914abee93376d6b37b5c2940655a6fcaf1c8e74237988ac0000000001000000014e3f8ef2e91349a9059cb4f01e54ab2597c1387161d3da89919f7ea6acdbb371010000008c49304602210081f3183471a5ca22307c0800226f3ef9c353069e0773ac76bb580654d56aa523022100d4c56465bdc069060846f4fbf2f6b20520b2a80b08b168b31e66ddb9c694e240014104976c79848e18251612f8940875b2b08d06e6dc73b9840e8860c066b7e87432c477e9a59a453e71e6d76d5fe34058b800a098fc1740ce3012e8fc8a00c96af966ffffffff02c0e1e400000000001976a9144134e75a6fcb6042034aab5e18570cf1f844f54788ac404b4c00000000001976a9142b6ba7c9d796b75eef7942fc9288edd37c32f5c388ac00000000',\n",
       "    'start_index': 9759,\n",
       "    'end_index': 15867}]},\n",
       " {'id': 370,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '76a914d63b766cd342e6f0f7390dd454065e4bbea26b1b88ac',\n",
       "    'start_index': 39483,\n",
       "    'end_index': 39533},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '000102030405060708090A0B0C0D0E0FF0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF',\n",
       "    'start_index': 3685,\n",
       "    'end_index': 3749},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '9ec8177ca0a4f7aa21ec88a324f236a4d1dce6c610812a90e16febef4603a438',\n",
       "    'start_index': 39397,\n",
       "    'end_index': 39461}]},\n",
       " {'id': 416,\n",
       "  'secrets': [{'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=',\n",
       "    'start_index': 736691,\n",
       "    'end_index': 736756}]},\n",
       " {'id': 445,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '5974787496DFA27A4B7FE6023473FAE930EA41DC',\n",
       "    'start_index': 918,\n",
       "    'end_index': 958},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '1128ED677399F969E25D9453320B85EF3D3BA35A',\n",
       "    'start_index': 40,\n",
       "    'end_index': 80},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '2904137A030AE2370A8CD3E068078A1D59A4F229',\n",
       "    'start_index': 749,\n",
       "    'end_index': 789}]},\n",
       " {'id': 548,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '1dbd57365a500dc852bb214404100124bb361a19618c7734fcf28cb932bd9630',\n",
       "    'start_index': 1102,\n",
       "    'end_index': 1166}]},\n",
       " {'id': 588,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '433080ff80d0d52d7f8bfffff47f00807f44f680000949b800007f7f7ff1017f',\n",
       "    'start_index': 1612,\n",
       "    'end_index': 1676},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '02a7955281885bf0f0ca4a48678848cad8dc5b328ce8bc1d4481d041c98e891ff3',\n",
       "    'start_index': 2103,\n",
       "    'end_index': 2169}]},\n",
       " {'id': 710,\n",
       "  'secrets': [{'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T',\n",
       "    'start_index': 359,\n",
       "    'end_index': 430},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=',\n",
       "    'start_index': 4091,\n",
       "    'end_index': 4142},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1',\n",
       "    'start_index': 4289,\n",
       "    'end_index': 4360},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM',\n",
       "    'start_index': 4509,\n",
       "    'end_index': 4580},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-fnmOCqbTlWIlj8LyTjo7mOUStjsKC4pOpQbqyi7RrhN7udi9RwhKkMHpvLbHG9Sr',\n",
       "    'start_index': 566,\n",
       "    'end_index': 637}]},\n",
       " {'id': 723,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '667aeda72477189d0494fecd327c3641',\n",
       "    'start_index': 958,\n",
       "    'end_index': 990},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '377b22b161c09ed6e5152de788ca020a',\n",
       "    'start_index': 6387,\n",
       "    'end_index': 6419},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'b6b991a57620e2fb6b2f66f03fe9ddc2',\n",
       "    'start_index': 1735,\n",
       "    'end_index': 1767},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\MethodParametersAlreadyConfiguredException',\n",
       "    'start_index': 553209,\n",
       "    'end_index': 553283},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '6e3fae29631ef280660b3cdad06f25a8',\n",
       "    'start_index': 519,\n",
       "    'end_index': 551},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\ReturnValueNotConfiguredException',\n",
       "    'start_index': 554885,\n",
       "    'end_index': 554950},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '0d59ee240a4cd96ddbb4ff164fccea4d',\n",
       "    'start_index': 1179,\n",
       "    'end_index': 1211},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'b067bc7112e384b61c701452d53a14a8',\n",
       "    'start_index': 1954,\n",
       "    'end_index': 1986},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\OriginalConstructorInvocationRequiredException',\n",
       "    'start_index': 554507,\n",
       "    'end_index': 554585},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '9cdd7b9056abc3081735233ba9dd9c7f',\n",
       "    'start_index': 5734,\n",
       "    'end_index': 5766},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '017b24472353920ed42bb364f7653c43',\n",
       "    'start_index': 6275,\n",
       "    'end_index': 6307},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'd154b49fab8e4da34fb553a2d644918c',\n",
       "    'start_index': 4384,\n",
       "    'end_index': 4416},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\MethodNameNotConfiguredException',\n",
       "    'start_index': 553021,\n",
       "    'end_index': 553085},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '8a9dc1de0ca7e01f3e08231539562f61',\n",
       "    'start_index': 2167,\n",
       "    'end_index': 2199},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\InvalidCoversTargetException',\n",
       "    'start_index': 547829,\n",
       "    'end_index': 547877},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '751a5a3f463e4be759be31748b61737c',\n",
       "    'start_index': 5103,\n",
       "    'end_index': 5135},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\BadMethodCallException',\n",
       "    'start_index': 548583,\n",
       "    'end_index': 548637},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'da94ac5d3ca7d2dbab84ce561ce72bfd',\n",
       "    'start_index': 3145,\n",
       "    'end_index': 3177},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\ComparisonMethodDoesNotDeclareBoolReturnTypeException',\n",
       "    'start_index': 537090,\n",
       "    'end_index': 537163},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '6124b4c8570aa390c21fafd04a26c69f',\n",
       "    'start_index': 5842,\n",
       "    'end_index': 5874},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '3d97c8dcdfba8cb85d3b34f116bb248b',\n",
       "    'start_index': 3283,\n",
       "    'end_index': 3315},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '801c31d8ed748cfa537fa45402288c95',\n",
       "    'start_index': 2384,\n",
       "    'end_index': 2416},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '58571171fd5812e6e447dce228f52f4d',\n",
       "    'start_index': 5613,\n",
       "    'end_index': 5645},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '290dd4ba42f11019134caca05dbefe3f',\n",
       "    'start_index': 6070,\n",
       "    'end_index': 6102},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '5c70426340c07411ceee79728a2304a8',\n",
       "    'start_index': 2907,\n",
       "    'end_index': 2939},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\TextUI\\\\\\\\XmlConfiguration\\\\\\\\RemoveCacheTokensAttribute',\n",
       "    'start_index': 578860,\n",
       "    'end_index': 578921},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '7b11c4dc42b3b3023073cb14e519683c',\n",
       "    'start_index': 1284,\n",
       "    'end_index': 1316},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'ec07570ca5a812141189b1fa81503674',\n",
       "    'start_index': 292,\n",
       "    'end_index': 324},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'd919fc9d5ad52cfb7f322f7fe36458ab',\n",
       "    'start_index': 4099,\n",
       "    'end_index': 4131},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'e39a8b23c42d4e1452234d762b03835a',\n",
       "    'start_index': 2809,\n",
       "    'end_index': 2841},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\MethodCannotBeConfiguredException',\n",
       "    'start_index': 552481,\n",
       "    'end_index': 552546},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'c7a3c339e7e14b60e06a2d7fcce9476b',\n",
       "    'start_index': 5367,\n",
       "    'end_index': 5399},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '786bf90caabc9e09b6ad4cc5ca8f0e30',\n",
       "    'start_index': 4959,\n",
       "    'end_index': 4991},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\ConfigurableMethod',\n",
       "    'start_index': 550393,\n",
       "    'end_index': 550443},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '2c102faa651ef8ea5874edb585946bce',\n",
       "    'start_index': 2269,\n",
       "    'end_index': 2301},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\InvalidMethodNameException',\n",
       "    'start_index': 551393,\n",
       "    'end_index': 551451},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\ActualValueIsNotAnObjectException',\n",
       "    'start_index': 536333,\n",
       "    'end_index': 536386},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'b6c2870932b0250c10334a86dcb33c7f',\n",
       "    'start_index': 3830,\n",
       "    'end_index': 3862},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\ClassIsFinalException',\n",
       "    'start_index': 550227,\n",
       "    'end_index': 550280},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'e69f7f6ee287b969198c3c9d6777bd38',\n",
       "    'start_index': 630,\n",
       "    'end_index': 662},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\CannotUseOnlyMethodsException',\n",
       "    'start_index': 549867,\n",
       "    'end_index': 549928},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'e397f74f8af3b1e56166a6e99f216ee7',\n",
       "    'start_index': 4239,\n",
       "    'end_index': 4271},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'a1105708a18b76903365ca1c4aa61b02',\n",
       "    'start_index': 2575,\n",
       "    'end_index': 2607},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '538ca81a9a966a6716601ecf48f4eaef',\n",
       "    'start_index': 2480,\n",
       "    'end_index': 2512},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'd02cf21124526632320d6f20b1bbf905',\n",
       "    'start_index': 3967,\n",
       "    'end_index': 3999},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'd59fbae42019aedf227094ac49a46f50',\n",
       "    'start_index': 3567,\n",
       "    'end_index': 3599},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '37a3dc5111fe8f707ab4c132ef1dbc62',\n",
       "    'start_index': 1842,\n",
       "    'end_index': 1874},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '320cde22f66dd4f5d3fd621d3e88b98f',\n",
       "    'start_index': 1074,\n",
       "    'end_index': 1106},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '573214ce1ef6f18100c43e366703d73c',\n",
       "    'start_index': 5955,\n",
       "    'end_index': 5987},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'de95e0ac670b27c84ef8c5ac41fc1b34',\n",
       "    'start_index': 3701,\n",
       "    'end_index': 3733},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'cc8e14526dc240491e17a838cb78508c',\n",
       "    'start_index': 4823,\n",
       "    'end_index': 4855},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\ConfigurableMethodsAlreadyInitializedException',\n",
       "    'start_index': 550543,\n",
       "    'end_index': 550621},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\MatchBuilderNotFoundException',\n",
       "    'start_index': 551851,\n",
       "    'end_index': 551912},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\MethodNameAlreadyConfiguredException',\n",
       "    'start_index': 552671,\n",
       "    'end_index': 552739},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\TextUI\\\\\\\\XmlConfiguration\\\\\\\\MoveWhitelistExcludesToCoverage',\n",
       "    'start_index': 578241,\n",
       "    'end_index': 578307},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\ClassAlreadyExistsException',\n",
       "    'start_index': 550049,\n",
       "    'end_index': 550108},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\CannotUseAddMethodsException',\n",
       "    'start_index': 549687,\n",
       "    'end_index': 549747},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'e6f3bc6883e449ab367280b34158c05b',\n",
       "    'start_index': 3420,\n",
       "    'end_index': 3452},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '265b4faa2b3a9766332744949e83bf97',\n",
       "    'start_index': 5242,\n",
       "    'end_index': 5274},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\NoChildTestSuiteException',\n",
       "    'start_index': 559526,\n",
       "    'end_index': 559571},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\MatcherAlreadyRegisteredException',\n",
       "    'start_index': 552161,\n",
       "    'end_index': 552226},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'def43f6c87e4f8dfd0c9e1b1bab14fe8',\n",
       "    'start_index': 2062,\n",
       "    'end_index': 2094},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '25072dd6e2470089de65ae7bf11d3109',\n",
       "    'start_index': 745,\n",
       "    'end_index': 777},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'a0edc8309cc5e1d60e3047b5df6b7052',\n",
       "    'start_index': 1398,\n",
       "    'end_index': 1430},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '07d7f1a47144818725fd8d91a907ac57',\n",
       "    'start_index': 3012,\n",
       "    'end_index': 3044},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'f598d06aa772fa33d905e87be6398fb1',\n",
       "    'start_index': 850,\n",
       "    'end_index': 882},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'a4a119a56e50fbb293281d9a48007e0e',\n",
       "    'start_index': 187,\n",
       "    'end_index': 219},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '8825ede83f2f289127722d4e842cf7e8',\n",
       "    'start_index': 1622,\n",
       "    'end_index': 1654},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\TextUI\\\\\\\\XmlConfiguration\\\\\\\\UpdateSchemaLocationTo93',\n",
       "    'start_index': 580915,\n",
       "    'end_index': 580974},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'f0906e6318348a765ffb6eb24e0d0938',\n",
       "    'start_index': 5489,\n",
       "    'end_index': 5521},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '0e6d7bf4a5811bfa5cf40c5ccd6fae6a',\n",
       "    'start_index': 411,\n",
       "    'end_index': 443},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'b0b88a3b89caae681462c58ff19a7059',\n",
       "    'start_index': 4682,\n",
       "    'end_index': 4714},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\Rule\\\\\\\\InvokedAtLeastCount',\n",
       "    'start_index': 555856,\n",
       "    'end_index': 555913},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '7e9bd612cc444b3eed788ebbe46263a0',\n",
       "    'start_index': 2687,\n",
       "    'end_index': 2719},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\IncompatibleReturnValueException',\n",
       "    'start_index': 551205,\n",
       "    'end_index': 551269},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'c964ee0ededf28c96ebd9db5099ef910',\n",
       "    'start_index': 1508,\n",
       "    'end_index': 1540},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'ed962a97bd972bc82007176b647d4e36',\n",
       "    'start_index': 6175,\n",
       "    'end_index': 6207},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'PHPUnit\\\\\\\\Framework\\\\\\\\MockObject\\\\\\\\SoapExtensionNotAvailableException',\n",
       "    'start_index': 557089,\n",
       "    'end_index': 557155},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '9d3db23ca418094bcf0b641a0c9559ed',\n",
       "    'start_index': 4528,\n",
       "    'end_index': 4560}]},\n",
       " {'id': 739,\n",
       "  'secrets': [{'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-cVKIPhGWiC2Al4u+LWgxfKTRIcfu0JTxR+EQDz/bgldoEyl4H0zUF0QKbrJ0EcQF',\n",
       "    'start_index': 8884,\n",
       "    'end_index': 8955},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-gXt9imSW0VcJVHezoNQsP+TNrjYXoGcrqBZJpry9zJt8PCQjobwmhMGaDHTASo9N',\n",
       "    'start_index': 642,\n",
       "    'end_index': 713},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-IQsoLXl5PILFhosVNubq5LC7Qb9DXgDA9i+tQ8Zj3iwWAwPtgFTxbJ8NT4GN1R8p',\n",
       "    'start_index': 8678,\n",
       "    'end_index': 8749},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM',\n",
       "    'start_index': 8460,\n",
       "    'end_index': 8531},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha384-AYmEC3Yw5cVb3ZcuHtOA93w35dYTsvhLPVnYs9eStHfGJvOvKxVfELGroGkvsg+p',\n",
       "    'start_index': 418,\n",
       "    'end_index': 489}]},\n",
       " {'id': 761,\n",
       "  'secrets': [{'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'sha512-nwpMzLYxfwDnu68Rt9PqLqgVtHkIJxEPrlu3PfTfLQKVgBAlTKDmim1JvCGNyNRtyvCx1nNIVBfYm8UZotWd4Q==',\n",
       "    'start_index': 713,\n",
       "    'end_index': 808}]},\n",
       " {'id': 899,\n",
       "  'secrets': [{'type': 'JSON Web Token',\n",
       "    'secret_value': 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiJ0a2VlbCIsImV4cCI6MTY0NDQwMDAzMiwic3ViIjoidXNyLTMzNzM3OTQ1YzJiNzE4ZGI0YzMwOWQ2MzNkMmYifQ.',\n",
       "    'start_index': 1555,\n",
       "    'end_index': 1691},\n",
       "   {'type': 'Base64 High Entropy String',\n",
       "    'secret_value': 'OTY0ZTkxM2UtYjA3OC0zNzM0LWJlYTAtODdiZDY4YzRjMGM4',\n",
       "    'start_index': 3516,\n",
       "    'end_index': 3564},\n",
       "   {'type': 'JSON Web Token',\n",
       "    'secret_value': 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiJ0a2VlbCIsImV4cCI6MTY0NDQwNDAzMSwic3ViIjoidXNyLTMzNzM3OTQ1YzJiNzE4ZGI0YzMwOWQ2MzNkMmYifQ.',\n",
       "    'start_index': 4620,\n",
       "    'end_index': 4756},\n",
       "   {'type': 'JSON Web Token',\n",
       "    'secret_value': 'eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiJ0a2VlbCIsImV4cCI6MTY0NDgyMzA2Miwic3ViIjoidXNyLTMzNzM3OTQ1YzJiNzE4ZGI0YzMwOWQ2MzNkMmYifQ.',\n",
       "    'start_index': 6746,\n",
       "    'end_index': 6882}]},\n",
       " {'id': 960,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '098a3b74c65c030729fad809210f1e31d96a2295610376989134be981f4fcc30',\n",
       "    'start_index': 461,\n",
       "    'end_index': 525},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '2998837ff866b54bdf6cdaf923417cc98a2dbe7fa831142c092ebb724694fa42',\n",
       "    'start_index': 737,\n",
       "    'end_index': 801},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '17e976657779458a4b7aebd420a0916fa7f2746e6530602ff13a55cf81469462',\n",
       "    'start_index': 372,\n",
       "    'end_index': 436},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'eb75dda827c656a33be6e60f18b3943c4dd4252205e557ec95d1cf44df8e3a35',\n",
       "    'start_index': 249,\n",
       "    'end_index': 313},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'd6edae2002e2df530bd14e8cb27eb6dce1a29fe15b5ec614d9c3b7610fe00d96',\n",
       "    'start_index': 555,\n",
       "    'end_index': 619},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': '51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274',\n",
       "    'start_index': 1027,\n",
       "    'end_index': 1091},\n",
       "   {'type': 'Hex High Entropy String',\n",
       "    'secret_value': 'aebb1a098a77f6e9477c5f426b363895d2f0cc77c46a3d84c871a9fab2f08d54',\n",
       "    'start_index': 644,\n",
       "    'end_index': 708}]},\n",
       " {'id': 975,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '71f05afc51e3d9b03376b2f98fd452d3a274d595',\n",
       "    'start_index': 171,\n",
       "    'end_index': 211}]},\n",
       " {'id': 978,\n",
       "  'secrets': [{'type': 'Hex High Entropy String',\n",
       "    'secret_value': '7f6c8dc83d77134b5a3a1c53f1202b395b04482b',\n",
       "    'start_index': 7737,\n",
       "    'end_index': 7777}]},\n",
       " {'id': 986,\n",
       "  'secrets': [{'type': 'Slack Token',\n",
       "    'secret_value': 'xoxb-3242497751-XHFJhTNa87987dhADff7873A',\n",
       "    'start_index': 934,\n",
       "    'end_index': 974}]}]"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_custom"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "id": "28f48982",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "\t'repo_type' : 'archive',\n",
      "\t'download_locations' : [\n",
      "\t\t#UPDATECHECKS: http://fftw.org/download.html\n",
      "\t\t#{ \"url\" : \"http://fftw.org/fftw-3.3.9.tar.gz\", \"hashes\" : [ { \"type\" : \"sha256\", \"sum\" : \"bf2c7ce40b04ae811af714deb512510cc2c17b9ab9d6ddcf49fe4487eea7af3d\" }, ], },\n",
      "\t\t#{ \"url\" : \"https://fossies.org/linux/misc/fftw-3.3.9.tar.gz\", \"hashes\" : [ { \"type\" : \"sha256\", \"sum\" : \"bf2c7ce40b04ae811af714deb512510cc2c17b9ab9d6ddcf49fe4487eea7af3d\" }, ], },\n",
      "\t\t{ \"url\" : \"http://fftw.org/fftw-3.3.10.tar.gz\", \"hashes\" : [ { \"type\" : \"sha256\", \"sum\" : \"56c932549852cddcfafdab3820b0200c7742675be92179e59\n"
     ]
    }
   ],
   "source": [
    "print(ds[25][\"content\"][0:194+400])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "id": "ea5760af",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "non gibberish secrets\n",
      "Number non gibberish secrets: 26\n",
      "Number gibberish secrets: 13\n"
     ]
    }
   ],
   "source": [
    "print(\"non gibberish secrets\")\n",
    "count, gibberish = 0, 0\n",
    "for detection in result_custom:\n",
    "    id = detection['id']\n",
    "    for d in detection[\"secrets\"]:\n",
    "        if d[\"type\"] == \"Base64 High Entropy String\":\n",
    "            if not Detector.is_gibberish(d[\"secret_value\"]):\n",
    "                count += 1\n",
    "            else: gibberish += 1\n",
    "print(f\"Number non gibberish secrets: {count}\")\n",
    "print(f\"Number gibberish secrets: {gibberish}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f5562da0",
   "metadata": {},
   "source": [
    "### Impact of changing the value of limit on Base64 entropy detector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "id": "c39e3cf3",
   "metadata": {},
   "outputs": [],
   "source": [
    "secrets = SecretsCollection()\n",
    "LIMIT = 5\n",
    "filters = [\n",
    "    # some filters from [original list](https://github.com/Yelp/detect-secrets/blob/master/docs/filters.md#built-in-filters) \n",
    "    # were removed based on their goal\n",
    "    {'path': 'detect_secrets.filters.heuristic.is_sequential_string'},\n",
    "    {'path': 'detect_secrets.filters.heuristic.is_potential_uuid'},\n",
    "    {'path': 'detect_secrets.filters.heuristic.is_likely_id_string'},\n",
    "    {'path': 'detect_secrets.filters.heuristic.is_templated_secret'},\n",
    "    {'path': 'detect_secrets.filters.heuristic.is_sequential_string'},\n",
    "]\n",
    "plugins = [\n",
    "        {'name': 'Base64HighEntropyString',\n",
    "         'limit': LIMIT},\n",
    "    ]\n",
    "\n",
    "def scan_str_content(content, suffix=\".txt\"):\n",
    "    \"\"\"Detect secret keys in content with selected plugins and filters\n",
    "    Args:\n",
    "        content (str): content to scan\n",
    "        suffix (str): suffix of the file\n",
    "    Returns:\n",
    "        list: list of secrets found\"\"\"\n",
    "    \n",
    "    fp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False, mode='w')\n",
    "    fp.write(content)\n",
    "    fp.close()\n",
    "    secrets = SecretsCollection()\n",
    "    with transient_settings({'plugins_used': plugins, 'filters_used': filters}) as settings:\n",
    "    #with default_settings() as settings:\n",
    "        secrets.scan_file(fp.name)\n",
    "    os.unlink(fp.name)\n",
    "    secrets_set = list(secrets.data.values())\n",
    "    result = []\n",
    "    if secrets_set:\n",
    "        for secret in secrets_set[0]:\n",
    "            result.append({\n",
    "                'type': secret.type,\n",
    "                'secret_value': secret.secret_value,\n",
    "                'start_index': content.index(secret.secret_value),\n",
    "                'end_index': content.index(secret.secret_value) + len(secret.secret_value),\n",
    "            })\n",
    "    return result\n",
    "\n",
    "\n",
    "def scan_secrets_batch(examples):\n",
    "    \"\"\"Scan a batch of examples from a dataset for secret keys\n",
    "    This add two columns to the dataset:\n",
    "    - pii: (list) of secrets found\n",
    "    - has_pii: (bool) whether the example contains secret\"\"\"\n",
    "\n",
    "    list_secrets = []\n",
    "    list_types = []\n",
    "    list_limits = []\n",
    "    has_secrets = []\n",
    "    for text in examples[\"content\"]:\n",
    "        output = scan_str_content(text, suffix=\".txt\")\n",
    "        if  output:\n",
    "            # get secret values of each element in output\n",
    "            # to add this in datasets we need same number of samples in each row\n",
    "            # we save it as str instead of list\n",
    "            secrets = str([e['secret_value'] for e in output])\n",
    "            types = str([e['type'] for e in output])\n",
    "            limits = str([(e['start_index'], e['end_index']) for e in output])\n",
    "            list_secrets.append(secrets)\n",
    "            list_types.append(types)\n",
    "            list_limits.append(limits)\n",
    "            has_secrets.append(True)\n",
    "        else:\n",
    "            list_secrets.append(\"\")\n",
    "            list_types.append(\"\")\n",
    "            list_limits.append(\"\")\n",
    "            has_secrets.append(False)\n",
    "    return {\"secrets\": list_secrets, \"types\": list_types, \"has_secrets\": has_secrets}\n",
    "\n",
    "\n",
    "def scan_secrets_batch_viz(examples):\n",
    "    outputs = []\n",
    "    for i, text in enumerate(examples[\"content\"]):\n",
    "        output = scan_str_content(text, suffix=\".txt\")\n",
    "        if  output:\n",
    "            outputs.append({\"id\": i, \"secrets\": output})\n",
    "    return outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "id": "5a61da0c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# use multiprocessing to scan the dataset, takes few seconds\n",
    "result_custom2 = scan_secrets_batch_viz(ds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "id": "b149b9bb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number non gibberish secrets: 2505\n",
      "Number gibberish secrets: 42\n"
     ]
    }
   ],
   "source": [
    "ount, gibberish = 0, 0\n",
    "for detection in result_custom2:\n",
    "    id = detection['id']\n",
    "    for d in detection[\"secrets\"]:\n",
    "        if d[\"type\"] == \"Base64 High Entropy String\":\n",
    "            if not Detector.is_gibberish(d[\"secret_value\"]):\n",
    "                count += 1\n",
    "            else: gibberish += 1\n",
    "print(f\"Number non gibberish secrets: {count}\")\n",
    "print(f\"Number gibberish secrets: {gibberish}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9ac6fae1",
   "metadata": {},
   "source": [
    "Results of detection for different limits:\n",
    "\n",
    "limit=5.5\n",
    "Number non gibberish secrets: 26\n",
    "Number gibberish secrets: 2\n",
    "\n",
    "limit=5 (default)\n",
    "Number non gibberish secrets: 26\n",
    "Number gibberish secrets: 12\n",
    "\n",
    "limit=4.5\n",
    "Number non gibberish secrets: 52\n",
    "Number gibberish secrets: 13\n",
    "\n",
    "limit=4\n",
    "Number non gibberish secrets: 2505\n",
    "Number gibberish secrets: 42\n",
    "\n",
    "=> changing limit doesn't help, let's just use the gibberish detector on top then"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.10.4 ('venv')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  },
  "vscode": {
   "interpreter": {
    "hash": "fd8fde6f83dada9276d12fdb71d773558994168ed1b3bea457b8db38c02aa2e1"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
